{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.7002356226546818, "eval_steps": 500, "global_step": 92826, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005817843325479244, "grad_norm": 14.125, "learning_rate": 9.998063627249894e-06, "loss": 1.8693, "step": 20 }, { "epoch": 0.0011635686650958489, "grad_norm": 11.625, "learning_rate": 9.996124345577279e-06, "loss": 1.767, "step": 40 }, { "epoch": 0.0017453529976437735, "grad_norm": 13.125, "learning_rate": 9.994185063904664e-06, "loss": 1.7496, "step": 60 }, { "epoch": 0.0023271373301916977, "grad_norm": 13.3125, "learning_rate": 9.992245782232049e-06, "loss": 1.7907, "step": 80 }, { "epoch": 0.0029089216627396224, "grad_norm": 12.125, "learning_rate": 9.990306500559434e-06, "loss": 1.6286, "step": 100 }, { "epoch": 0.003490705995287547, "grad_norm": 12.4375, "learning_rate": 9.98836721888682e-06, "loss": 1.8037, "step": 120 }, { "epoch": 0.004072490327835471, "grad_norm": 12.5625, "learning_rate": 9.986427937214204e-06, "loss": 1.7154, "step": 140 }, { "epoch": 0.0046542746603833954, "grad_norm": 14.4375, "learning_rate": 9.98448865554159e-06, "loss": 1.7231, "step": 160 }, { "epoch": 0.0052360589929313205, "grad_norm": 9.5625, "learning_rate": 9.982549373868974e-06, "loss": 1.701, "step": 180 }, { "epoch": 0.005817843325479245, "grad_norm": 12.625, "learning_rate": 9.98061009219636e-06, "loss": 1.7652, "step": 200 }, { "epoch": 0.006399627658027169, "grad_norm": 12.6875, "learning_rate": 9.978670810523745e-06, "loss": 1.7116, "step": 220 }, { "epoch": 0.006981411990575094, "grad_norm": 12.6875, "learning_rate": 9.97673152885113e-06, "loss": 1.6567, "step": 240 }, { "epoch": 0.007563196323123018, "grad_norm": 14.8125, "learning_rate": 9.974792247178515e-06, "loss": 1.7739, "step": 260 }, { "epoch": 0.008144980655670942, "grad_norm": 13.0625, "learning_rate": 9.9728529655059e-06, "loss": 1.9183, "step": 280 }, { "epoch": 0.008726764988218868, "grad_norm": 14.375, "learning_rate": 9.970913683833285e-06, "loss": 1.7885, "step": 300 }, { "epoch": 0.009308549320766791, "grad_norm": 13.3125, "learning_rate": 9.96897440216067e-06, "loss": 1.7945, "step": 320 }, { "epoch": 0.009890333653314716, "grad_norm": 12.5625, "learning_rate": 9.967035120488055e-06, "loss": 1.7623, "step": 340 }, { "epoch": 0.010472117985862641, "grad_norm": 13.375, "learning_rate": 9.96509583881544e-06, "loss": 1.6728, "step": 360 }, { "epoch": 0.011053902318410564, "grad_norm": 13.3125, "learning_rate": 9.963156557142825e-06, "loss": 1.7254, "step": 380 }, { "epoch": 0.01163568665095849, "grad_norm": 15.5, "learning_rate": 9.96121727547021e-06, "loss": 1.6798, "step": 400 }, { "epoch": 0.012217470983506415, "grad_norm": 12.6875, "learning_rate": 9.959277993797596e-06, "loss": 1.7626, "step": 420 }, { "epoch": 0.012799255316054338, "grad_norm": 13.0625, "learning_rate": 9.95733871212498e-06, "loss": 1.823, "step": 440 }, { "epoch": 0.013381039648602263, "grad_norm": 11.75, "learning_rate": 9.955399430452366e-06, "loss": 1.688, "step": 460 }, { "epoch": 0.013962823981150188, "grad_norm": 13.3125, "learning_rate": 9.953460148779751e-06, "loss": 1.762, "step": 480 }, { "epoch": 0.014544608313698111, "grad_norm": 12.1875, "learning_rate": 9.951520867107136e-06, "loss": 1.6487, "step": 500 }, { "epoch": 0.015126392646246037, "grad_norm": 15.0625, "learning_rate": 9.949581585434521e-06, "loss": 1.8255, "step": 520 }, { "epoch": 0.01570817697879396, "grad_norm": 13.4375, "learning_rate": 9.947642303761906e-06, "loss": 1.7325, "step": 540 }, { "epoch": 0.016289961311341885, "grad_norm": 15.25, "learning_rate": 9.945703022089291e-06, "loss": 1.8285, "step": 560 }, { "epoch": 0.01687174564388981, "grad_norm": 12.375, "learning_rate": 9.943763740416676e-06, "loss": 1.7398, "step": 580 }, { "epoch": 0.017453529976437735, "grad_norm": 16.625, "learning_rate": 9.941824458744062e-06, "loss": 1.7851, "step": 600 }, { "epoch": 0.01803531430898566, "grad_norm": 11.875, "learning_rate": 9.939885177071447e-06, "loss": 1.7592, "step": 620 }, { "epoch": 0.018617098641533582, "grad_norm": 16.875, "learning_rate": 9.937945895398832e-06, "loss": 1.7966, "step": 640 }, { "epoch": 0.019198882974081507, "grad_norm": 10.9375, "learning_rate": 9.936006613726217e-06, "loss": 1.704, "step": 660 }, { "epoch": 0.019780667306629432, "grad_norm": 18.5, "learning_rate": 9.934067332053602e-06, "loss": 1.7116, "step": 680 }, { "epoch": 0.020362451639177357, "grad_norm": 14.6875, "learning_rate": 9.932128050380987e-06, "loss": 1.7695, "step": 700 }, { "epoch": 0.020944235971725282, "grad_norm": 12.625, "learning_rate": 9.930188768708372e-06, "loss": 1.7622, "step": 720 }, { "epoch": 0.021526020304273207, "grad_norm": 13.3125, "learning_rate": 9.928249487035757e-06, "loss": 1.7275, "step": 740 }, { "epoch": 0.02210780463682113, "grad_norm": 12.5, "learning_rate": 9.926310205363142e-06, "loss": 1.6935, "step": 760 }, { "epoch": 0.022689588969369054, "grad_norm": 12.75, "learning_rate": 9.924370923690527e-06, "loss": 1.8071, "step": 780 }, { "epoch": 0.02327137330191698, "grad_norm": 13.9375, "learning_rate": 9.922431642017912e-06, "loss": 1.7624, "step": 800 }, { "epoch": 0.023853157634464904, "grad_norm": 12.375, "learning_rate": 9.920492360345298e-06, "loss": 1.7581, "step": 820 }, { "epoch": 0.02443494196701283, "grad_norm": 11.625, "learning_rate": 9.918553078672683e-06, "loss": 1.7039, "step": 840 }, { "epoch": 0.025016726299560754, "grad_norm": 12.125, "learning_rate": 9.916613797000068e-06, "loss": 1.7564, "step": 860 }, { "epoch": 0.025598510632108676, "grad_norm": 13.875, "learning_rate": 9.914674515327453e-06, "loss": 1.7778, "step": 880 }, { "epoch": 0.0261802949646566, "grad_norm": 12.25, "learning_rate": 9.912735233654838e-06, "loss": 1.7182, "step": 900 }, { "epoch": 0.026762079297204526, "grad_norm": 14.4375, "learning_rate": 9.910795951982223e-06, "loss": 1.6581, "step": 920 }, { "epoch": 0.02734386362975245, "grad_norm": 13.5625, "learning_rate": 9.908856670309608e-06, "loss": 1.7495, "step": 940 }, { "epoch": 0.027925647962300376, "grad_norm": 13.125, "learning_rate": 9.906917388636993e-06, "loss": 1.75, "step": 960 }, { "epoch": 0.0285074322948483, "grad_norm": 13.4375, "learning_rate": 9.904978106964378e-06, "loss": 1.8321, "step": 980 }, { "epoch": 0.029089216627396223, "grad_norm": 12.0625, "learning_rate": 9.903038825291763e-06, "loss": 1.7237, "step": 1000 }, { "epoch": 0.029671000959944148, "grad_norm": 14.0625, "learning_rate": 9.901099543619149e-06, "loss": 1.7602, "step": 1020 }, { "epoch": 0.030252785292492073, "grad_norm": 10.375, "learning_rate": 9.899160261946534e-06, "loss": 1.7526, "step": 1040 }, { "epoch": 0.030834569625039998, "grad_norm": 12.0625, "learning_rate": 9.897220980273919e-06, "loss": 1.6564, "step": 1060 }, { "epoch": 0.03141635395758792, "grad_norm": 14.0, "learning_rate": 9.895281698601304e-06, "loss": 1.7071, "step": 1080 }, { "epoch": 0.03199813829013585, "grad_norm": 13.25, "learning_rate": 9.893342416928689e-06, "loss": 1.7021, "step": 1100 }, { "epoch": 0.03257992262268377, "grad_norm": 14.5625, "learning_rate": 9.891403135256074e-06, "loss": 1.6877, "step": 1120 }, { "epoch": 0.0331617069552317, "grad_norm": 12.5625, "learning_rate": 9.88946385358346e-06, "loss": 1.6961, "step": 1140 }, { "epoch": 0.03374349128777962, "grad_norm": 13.0625, "learning_rate": 9.887524571910844e-06, "loss": 1.7913, "step": 1160 }, { "epoch": 0.03432527562032754, "grad_norm": 13.125, "learning_rate": 9.88558529023823e-06, "loss": 1.7257, "step": 1180 }, { "epoch": 0.03490705995287547, "grad_norm": 14.125, "learning_rate": 9.883646008565614e-06, "loss": 1.7231, "step": 1200 }, { "epoch": 0.03548884428542339, "grad_norm": 12.75, "learning_rate": 9.881706726893e-06, "loss": 1.744, "step": 1220 }, { "epoch": 0.03607062861797132, "grad_norm": 12.25, "learning_rate": 9.879767445220385e-06, "loss": 1.6365, "step": 1240 }, { "epoch": 0.03665241295051924, "grad_norm": 9.375, "learning_rate": 9.87782816354777e-06, "loss": 1.7705, "step": 1260 }, { "epoch": 0.037234197283067164, "grad_norm": 10.8125, "learning_rate": 9.875888881875155e-06, "loss": 1.7309, "step": 1280 }, { "epoch": 0.03781598161561509, "grad_norm": 11.0, "learning_rate": 9.87394960020254e-06, "loss": 1.7818, "step": 1300 }, { "epoch": 0.038397765948163014, "grad_norm": 13.6875, "learning_rate": 9.872010318529925e-06, "loss": 1.7461, "step": 1320 }, { "epoch": 0.03897955028071094, "grad_norm": 13.8125, "learning_rate": 9.87007103685731e-06, "loss": 1.7633, "step": 1340 }, { "epoch": 0.039561334613258864, "grad_norm": 12.25, "learning_rate": 9.868131755184695e-06, "loss": 1.7222, "step": 1360 }, { "epoch": 0.04014311894580679, "grad_norm": 13.3125, "learning_rate": 9.86619247351208e-06, "loss": 1.7887, "step": 1380 }, { "epoch": 0.040724903278354714, "grad_norm": 12.9375, "learning_rate": 9.864253191839465e-06, "loss": 1.7775, "step": 1400 }, { "epoch": 0.041306687610902636, "grad_norm": 12.25, "learning_rate": 9.86231391016685e-06, "loss": 1.724, "step": 1420 }, { "epoch": 0.041888471943450564, "grad_norm": 9.125, "learning_rate": 9.860374628494236e-06, "loss": 1.7322, "step": 1440 }, { "epoch": 0.042470256275998486, "grad_norm": 13.125, "learning_rate": 9.858435346821619e-06, "loss": 1.7101, "step": 1460 }, { "epoch": 0.043052040608546414, "grad_norm": 14.0, "learning_rate": 9.856496065149004e-06, "loss": 1.7176, "step": 1480 }, { "epoch": 0.043633824941094336, "grad_norm": 14.6875, "learning_rate": 9.85455678347639e-06, "loss": 1.6656, "step": 1500 }, { "epoch": 0.04421560927364226, "grad_norm": 12.5, "learning_rate": 9.852617501803774e-06, "loss": 1.7662, "step": 1520 }, { "epoch": 0.044797393606190186, "grad_norm": 13.4375, "learning_rate": 9.85067822013116e-06, "loss": 1.6732, "step": 1540 }, { "epoch": 0.04537917793873811, "grad_norm": 13.0, "learning_rate": 9.848738938458545e-06, "loss": 1.7837, "step": 1560 }, { "epoch": 0.045960962271286036, "grad_norm": 11.8125, "learning_rate": 9.84679965678593e-06, "loss": 1.703, "step": 1580 }, { "epoch": 0.04654274660383396, "grad_norm": 9.3125, "learning_rate": 9.844860375113315e-06, "loss": 1.7084, "step": 1600 }, { "epoch": 0.047124530936381887, "grad_norm": 10.8125, "learning_rate": 9.8429210934407e-06, "loss": 1.795, "step": 1620 }, { "epoch": 0.04770631526892981, "grad_norm": 10.75, "learning_rate": 9.840981811768085e-06, "loss": 1.8042, "step": 1640 }, { "epoch": 0.04828809960147773, "grad_norm": 12.4375, "learning_rate": 9.83904253009547e-06, "loss": 1.6848, "step": 1660 }, { "epoch": 0.04886988393402566, "grad_norm": 14.0, "learning_rate": 9.837103248422855e-06, "loss": 1.6867, "step": 1680 }, { "epoch": 0.04945166826657358, "grad_norm": 13.9375, "learning_rate": 9.83516396675024e-06, "loss": 1.7605, "step": 1700 }, { "epoch": 0.05003345259912151, "grad_norm": 13.4375, "learning_rate": 9.833224685077625e-06, "loss": 1.6898, "step": 1720 }, { "epoch": 0.05061523693166943, "grad_norm": 11.5, "learning_rate": 9.83128540340501e-06, "loss": 1.723, "step": 1740 }, { "epoch": 0.05119702126421735, "grad_norm": 14.5625, "learning_rate": 9.829346121732395e-06, "loss": 1.7505, "step": 1760 }, { "epoch": 0.05177880559676528, "grad_norm": 12.1875, "learning_rate": 9.82740684005978e-06, "loss": 1.7424, "step": 1780 }, { "epoch": 0.0523605899293132, "grad_norm": 13.0625, "learning_rate": 9.825467558387166e-06, "loss": 1.7796, "step": 1800 }, { "epoch": 0.05294237426186113, "grad_norm": 11.3125, "learning_rate": 9.82352827671455e-06, "loss": 1.6866, "step": 1820 }, { "epoch": 0.05352415859440905, "grad_norm": 11.375, "learning_rate": 9.821588995041936e-06, "loss": 1.812, "step": 1840 }, { "epoch": 0.054105942926956974, "grad_norm": 13.625, "learning_rate": 9.819649713369321e-06, "loss": 1.7411, "step": 1860 }, { "epoch": 0.0546877272595049, "grad_norm": 12.5625, "learning_rate": 9.817710431696706e-06, "loss": 1.7386, "step": 1880 }, { "epoch": 0.055269511592052824, "grad_norm": 13.9375, "learning_rate": 9.815771150024091e-06, "loss": 1.7493, "step": 1900 }, { "epoch": 0.05585129592460075, "grad_norm": 16.625, "learning_rate": 9.813831868351476e-06, "loss": 1.7573, "step": 1920 }, { "epoch": 0.056433080257148674, "grad_norm": 15.3125, "learning_rate": 9.811892586678861e-06, "loss": 1.7363, "step": 1940 }, { "epoch": 0.0570148645896966, "grad_norm": 13.75, "learning_rate": 9.809953305006246e-06, "loss": 1.7155, "step": 1960 }, { "epoch": 0.057596648922244524, "grad_norm": 13.5, "learning_rate": 9.808014023333632e-06, "loss": 1.7193, "step": 1980 }, { "epoch": 0.058178433254792446, "grad_norm": 14.5625, "learning_rate": 9.806074741661017e-06, "loss": 1.6673, "step": 2000 }, { "epoch": 0.058760217587340374, "grad_norm": 12.375, "learning_rate": 9.804135459988402e-06, "loss": 1.7952, "step": 2020 }, { "epoch": 0.059342001919888296, "grad_norm": 13.1875, "learning_rate": 9.802196178315787e-06, "loss": 1.7525, "step": 2040 }, { "epoch": 0.059923786252436224, "grad_norm": 12.6875, "learning_rate": 9.800256896643172e-06, "loss": 1.7017, "step": 2060 }, { "epoch": 0.060505570584984146, "grad_norm": 14.0625, "learning_rate": 9.798317614970557e-06, "loss": 1.6701, "step": 2080 }, { "epoch": 0.06108735491753207, "grad_norm": 12.8125, "learning_rate": 9.796378333297942e-06, "loss": 1.7619, "step": 2100 }, { "epoch": 0.061669139250079996, "grad_norm": 12.875, "learning_rate": 9.794439051625327e-06, "loss": 1.6994, "step": 2120 }, { "epoch": 0.06225092358262792, "grad_norm": 13.6875, "learning_rate": 9.792499769952712e-06, "loss": 1.6774, "step": 2140 }, { "epoch": 0.06283270791517584, "grad_norm": 10.6875, "learning_rate": 9.790560488280097e-06, "loss": 1.7265, "step": 2160 }, { "epoch": 0.06341449224772377, "grad_norm": 11.375, "learning_rate": 9.788621206607483e-06, "loss": 1.749, "step": 2180 }, { "epoch": 0.0639962765802717, "grad_norm": 11.0, "learning_rate": 9.786681924934868e-06, "loss": 1.7317, "step": 2200 }, { "epoch": 0.06457806091281962, "grad_norm": 12.4375, "learning_rate": 9.784742643262253e-06, "loss": 1.7559, "step": 2220 }, { "epoch": 0.06515984524536754, "grad_norm": 10.375, "learning_rate": 9.782803361589638e-06, "loss": 1.7727, "step": 2240 }, { "epoch": 0.06574162957791546, "grad_norm": 14.0625, "learning_rate": 9.780864079917023e-06, "loss": 1.6845, "step": 2260 }, { "epoch": 0.0663234139104634, "grad_norm": 12.8125, "learning_rate": 9.778924798244408e-06, "loss": 1.762, "step": 2280 }, { "epoch": 0.06690519824301132, "grad_norm": 11.9375, "learning_rate": 9.776985516571793e-06, "loss": 1.7802, "step": 2300 }, { "epoch": 0.06748698257555924, "grad_norm": 10.125, "learning_rate": 9.775046234899178e-06, "loss": 1.6035, "step": 2320 }, { "epoch": 0.06806876690810716, "grad_norm": 11.4375, "learning_rate": 9.773106953226563e-06, "loss": 1.7954, "step": 2340 }, { "epoch": 0.06865055124065508, "grad_norm": 13.4375, "learning_rate": 9.771167671553948e-06, "loss": 1.7277, "step": 2360 }, { "epoch": 0.06923233557320302, "grad_norm": 14.0, "learning_rate": 9.769228389881334e-06, "loss": 1.7112, "step": 2380 }, { "epoch": 0.06981411990575094, "grad_norm": 11.875, "learning_rate": 9.767289108208719e-06, "loss": 1.7412, "step": 2400 }, { "epoch": 0.07039590423829886, "grad_norm": 14.6875, "learning_rate": 9.765349826536104e-06, "loss": 1.6549, "step": 2420 }, { "epoch": 0.07097768857084678, "grad_norm": 13.125, "learning_rate": 9.763410544863489e-06, "loss": 1.7308, "step": 2440 }, { "epoch": 0.0715594729033947, "grad_norm": 10.25, "learning_rate": 9.761471263190874e-06, "loss": 1.7989, "step": 2460 }, { "epoch": 0.07214125723594264, "grad_norm": 12.8125, "learning_rate": 9.759531981518259e-06, "loss": 1.6674, "step": 2480 }, { "epoch": 0.07272304156849056, "grad_norm": 12.125, "learning_rate": 9.757592699845644e-06, "loss": 1.7343, "step": 2500 }, { "epoch": 0.07330482590103848, "grad_norm": 13.1875, "learning_rate": 9.75565341817303e-06, "loss": 1.7163, "step": 2520 }, { "epoch": 0.0738866102335864, "grad_norm": 13.3125, "learning_rate": 9.753714136500414e-06, "loss": 1.7604, "step": 2540 }, { "epoch": 0.07446839456613433, "grad_norm": 12.9375, "learning_rate": 9.7517748548278e-06, "loss": 1.73, "step": 2560 }, { "epoch": 0.07505017889868226, "grad_norm": 12.625, "learning_rate": 9.749835573155184e-06, "loss": 1.721, "step": 2580 }, { "epoch": 0.07563196323123018, "grad_norm": 11.6875, "learning_rate": 9.74789629148257e-06, "loss": 1.7146, "step": 2600 }, { "epoch": 0.0762137475637781, "grad_norm": 15.3125, "learning_rate": 9.745957009809955e-06, "loss": 1.7791, "step": 2620 }, { "epoch": 0.07679553189632603, "grad_norm": 16.0, "learning_rate": 9.74401772813734e-06, "loss": 1.7371, "step": 2640 }, { "epoch": 0.07737731622887396, "grad_norm": 11.5625, "learning_rate": 9.742078446464725e-06, "loss": 1.7157, "step": 2660 }, { "epoch": 0.07795910056142188, "grad_norm": 12.375, "learning_rate": 9.74013916479211e-06, "loss": 1.6902, "step": 2680 }, { "epoch": 0.0785408848939698, "grad_norm": 13.1875, "learning_rate": 9.738199883119495e-06, "loss": 1.7734, "step": 2700 }, { "epoch": 0.07912266922651773, "grad_norm": 13.5625, "learning_rate": 9.73626060144688e-06, "loss": 1.7238, "step": 2720 }, { "epoch": 0.07970445355906565, "grad_norm": 10.9375, "learning_rate": 9.734321319774265e-06, "loss": 1.7213, "step": 2740 }, { "epoch": 0.08028623789161358, "grad_norm": 12.5625, "learning_rate": 9.73238203810165e-06, "loss": 1.647, "step": 2760 }, { "epoch": 0.0808680222241615, "grad_norm": 14.875, "learning_rate": 9.730442756429035e-06, "loss": 1.6843, "step": 2780 }, { "epoch": 0.08144980655670943, "grad_norm": 13.0625, "learning_rate": 9.72850347475642e-06, "loss": 1.734, "step": 2800 }, { "epoch": 0.08203159088925735, "grad_norm": 11.0, "learning_rate": 9.726564193083806e-06, "loss": 1.7499, "step": 2820 }, { "epoch": 0.08261337522180527, "grad_norm": 16.75, "learning_rate": 9.72462491141119e-06, "loss": 1.7646, "step": 2840 }, { "epoch": 0.0831951595543532, "grad_norm": 11.8125, "learning_rate": 9.722685629738576e-06, "loss": 1.7599, "step": 2860 }, { "epoch": 0.08377694388690113, "grad_norm": 11.5, "learning_rate": 9.720746348065961e-06, "loss": 1.7405, "step": 2880 }, { "epoch": 0.08435872821944905, "grad_norm": 11.375, "learning_rate": 9.718807066393346e-06, "loss": 1.6979, "step": 2900 }, { "epoch": 0.08494051255199697, "grad_norm": 11.8125, "learning_rate": 9.716867784720731e-06, "loss": 1.7044, "step": 2920 }, { "epoch": 0.0855222968845449, "grad_norm": 12.25, "learning_rate": 9.714928503048116e-06, "loss": 1.7057, "step": 2940 }, { "epoch": 0.08610408121709283, "grad_norm": 12.3125, "learning_rate": 9.712989221375501e-06, "loss": 1.6687, "step": 2960 }, { "epoch": 0.08668586554964075, "grad_norm": 12.5625, "learning_rate": 9.711049939702886e-06, "loss": 1.7973, "step": 2980 }, { "epoch": 0.08726764988218867, "grad_norm": 11.4375, "learning_rate": 9.709110658030272e-06, "loss": 1.7279, "step": 3000 }, { "epoch": 0.0878494342147366, "grad_norm": 12.4375, "learning_rate": 9.707171376357657e-06, "loss": 1.6971, "step": 3020 }, { "epoch": 0.08843121854728452, "grad_norm": 11.125, "learning_rate": 9.705232094685042e-06, "loss": 1.679, "step": 3040 }, { "epoch": 0.08901300287983245, "grad_norm": 14.875, "learning_rate": 9.703292813012427e-06, "loss": 1.699, "step": 3060 }, { "epoch": 0.08959478721238037, "grad_norm": 11.875, "learning_rate": 9.70135353133981e-06, "loss": 1.803, "step": 3080 }, { "epoch": 0.0901765715449283, "grad_norm": 13.75, "learning_rate": 9.699414249667195e-06, "loss": 1.6699, "step": 3100 }, { "epoch": 0.09075835587747622, "grad_norm": 11.9375, "learning_rate": 9.69747496799458e-06, "loss": 1.7585, "step": 3120 }, { "epoch": 0.09134014021002414, "grad_norm": 15.1875, "learning_rate": 9.695535686321966e-06, "loss": 1.7541, "step": 3140 }, { "epoch": 0.09192192454257207, "grad_norm": 14.125, "learning_rate": 9.69359640464935e-06, "loss": 1.7145, "step": 3160 }, { "epoch": 0.09250370887512, "grad_norm": 12.9375, "learning_rate": 9.691657122976736e-06, "loss": 1.7008, "step": 3180 }, { "epoch": 0.09308549320766792, "grad_norm": 11.8125, "learning_rate": 9.68971784130412e-06, "loss": 1.6591, "step": 3200 }, { "epoch": 0.09366727754021584, "grad_norm": 11.9375, "learning_rate": 9.687778559631506e-06, "loss": 1.6305, "step": 3220 }, { "epoch": 0.09424906187276377, "grad_norm": 12.0625, "learning_rate": 9.685839277958891e-06, "loss": 1.7425, "step": 3240 }, { "epoch": 0.0948308462053117, "grad_norm": 12.3125, "learning_rate": 9.683899996286276e-06, "loss": 1.7959, "step": 3260 }, { "epoch": 0.09541263053785962, "grad_norm": 12.875, "learning_rate": 9.681960714613661e-06, "loss": 1.765, "step": 3280 }, { "epoch": 0.09599441487040754, "grad_norm": 14.0625, "learning_rate": 9.680021432941046e-06, "loss": 1.7248, "step": 3300 }, { "epoch": 0.09657619920295546, "grad_norm": 14.875, "learning_rate": 9.678082151268431e-06, "loss": 1.6914, "step": 3320 }, { "epoch": 0.0971579835355034, "grad_norm": 12.4375, "learning_rate": 9.676142869595816e-06, "loss": 1.7039, "step": 3340 }, { "epoch": 0.09773976786805132, "grad_norm": 11.8125, "learning_rate": 9.674203587923202e-06, "loss": 1.5978, "step": 3360 }, { "epoch": 0.09832155220059924, "grad_norm": 12.625, "learning_rate": 9.672264306250587e-06, "loss": 1.6436, "step": 3380 }, { "epoch": 0.09890333653314716, "grad_norm": 10.0625, "learning_rate": 9.670325024577972e-06, "loss": 1.7001, "step": 3400 }, { "epoch": 0.09948512086569508, "grad_norm": 12.3125, "learning_rate": 9.668385742905357e-06, "loss": 1.7009, "step": 3420 }, { "epoch": 0.10006690519824302, "grad_norm": 13.1875, "learning_rate": 9.666446461232742e-06, "loss": 1.7319, "step": 3440 }, { "epoch": 0.10064868953079094, "grad_norm": 17.75, "learning_rate": 9.664507179560127e-06, "loss": 1.7174, "step": 3460 }, { "epoch": 0.10123047386333886, "grad_norm": 10.5, "learning_rate": 9.662567897887512e-06, "loss": 1.7301, "step": 3480 }, { "epoch": 0.10181225819588678, "grad_norm": 10.75, "learning_rate": 9.660628616214897e-06, "loss": 1.6914, "step": 3500 }, { "epoch": 0.1023940425284347, "grad_norm": 12.75, "learning_rate": 9.658689334542282e-06, "loss": 1.6277, "step": 3520 }, { "epoch": 0.10297582686098264, "grad_norm": 14.4375, "learning_rate": 9.656750052869667e-06, "loss": 1.6839, "step": 3540 }, { "epoch": 0.10355761119353056, "grad_norm": 12.5625, "learning_rate": 9.654810771197053e-06, "loss": 1.7156, "step": 3560 }, { "epoch": 0.10413939552607848, "grad_norm": 13.6875, "learning_rate": 9.652871489524438e-06, "loss": 1.6836, "step": 3580 }, { "epoch": 0.1047211798586264, "grad_norm": 13.3125, "learning_rate": 9.650932207851823e-06, "loss": 1.7005, "step": 3600 }, { "epoch": 0.10530296419117433, "grad_norm": 10.6875, "learning_rate": 9.648992926179208e-06, "loss": 1.7016, "step": 3620 }, { "epoch": 0.10588474852372226, "grad_norm": 13.4375, "learning_rate": 9.647053644506591e-06, "loss": 1.6735, "step": 3640 }, { "epoch": 0.10646653285627018, "grad_norm": 13.75, "learning_rate": 9.645114362833976e-06, "loss": 1.7366, "step": 3660 }, { "epoch": 0.1070483171888181, "grad_norm": 12.75, "learning_rate": 9.643175081161361e-06, "loss": 1.6118, "step": 3680 }, { "epoch": 0.10763010152136603, "grad_norm": 14.75, "learning_rate": 9.641235799488747e-06, "loss": 1.7359, "step": 3700 }, { "epoch": 0.10821188585391395, "grad_norm": 13.0625, "learning_rate": 9.639296517816132e-06, "loss": 1.687, "step": 3720 }, { "epoch": 0.10879367018646188, "grad_norm": 12.75, "learning_rate": 9.637357236143517e-06, "loss": 1.6533, "step": 3740 }, { "epoch": 0.1093754545190098, "grad_norm": 13.9375, "learning_rate": 9.635417954470902e-06, "loss": 1.742, "step": 3760 }, { "epoch": 0.10995723885155773, "grad_norm": 10.8125, "learning_rate": 9.633478672798287e-06, "loss": 1.6269, "step": 3780 }, { "epoch": 0.11053902318410565, "grad_norm": 11.625, "learning_rate": 9.631539391125672e-06, "loss": 1.7434, "step": 3800 }, { "epoch": 0.11112080751665358, "grad_norm": 12.1875, "learning_rate": 9.629600109453057e-06, "loss": 1.6067, "step": 3820 }, { "epoch": 0.1117025918492015, "grad_norm": 14.375, "learning_rate": 9.627660827780442e-06, "loss": 1.6903, "step": 3840 }, { "epoch": 0.11228437618174943, "grad_norm": 13.9375, "learning_rate": 9.625721546107827e-06, "loss": 1.6103, "step": 3860 }, { "epoch": 0.11286616051429735, "grad_norm": 13.1875, "learning_rate": 9.623782264435212e-06, "loss": 1.7284, "step": 3880 }, { "epoch": 0.11344794484684527, "grad_norm": 14.375, "learning_rate": 9.621842982762598e-06, "loss": 1.6879, "step": 3900 }, { "epoch": 0.1140297291793932, "grad_norm": 13.9375, "learning_rate": 9.619903701089983e-06, "loss": 1.7217, "step": 3920 }, { "epoch": 0.11461151351194113, "grad_norm": 10.125, "learning_rate": 9.617964419417368e-06, "loss": 1.6572, "step": 3940 }, { "epoch": 0.11519329784448905, "grad_norm": 12.6875, "learning_rate": 9.616025137744753e-06, "loss": 1.7197, "step": 3960 }, { "epoch": 0.11577508217703697, "grad_norm": 11.8125, "learning_rate": 9.614085856072138e-06, "loss": 1.7059, "step": 3980 }, { "epoch": 0.11635686650958489, "grad_norm": 10.4375, "learning_rate": 9.612146574399523e-06, "loss": 1.6629, "step": 4000 }, { "epoch": 0.11693865084213283, "grad_norm": 15.75, "learning_rate": 9.610207292726908e-06, "loss": 1.5862, "step": 4020 }, { "epoch": 0.11752043517468075, "grad_norm": 12.9375, "learning_rate": 9.608268011054293e-06, "loss": 1.7666, "step": 4040 }, { "epoch": 0.11810221950722867, "grad_norm": 11.1875, "learning_rate": 9.606328729381678e-06, "loss": 1.7658, "step": 4060 }, { "epoch": 0.11868400383977659, "grad_norm": 12.1875, "learning_rate": 9.604389447709063e-06, "loss": 1.6754, "step": 4080 }, { "epoch": 0.11926578817232451, "grad_norm": 12.625, "learning_rate": 9.602450166036448e-06, "loss": 1.7163, "step": 4100 }, { "epoch": 0.11984757250487245, "grad_norm": 12.0, "learning_rate": 9.600510884363834e-06, "loss": 1.7055, "step": 4120 }, { "epoch": 0.12042935683742037, "grad_norm": 13.8125, "learning_rate": 9.598571602691219e-06, "loss": 1.6927, "step": 4140 }, { "epoch": 0.12101114116996829, "grad_norm": 11.375, "learning_rate": 9.596632321018604e-06, "loss": 1.6708, "step": 4160 }, { "epoch": 0.12159292550251621, "grad_norm": 12.3125, "learning_rate": 9.594693039345989e-06, "loss": 1.6681, "step": 4180 }, { "epoch": 0.12217470983506414, "grad_norm": 13.125, "learning_rate": 9.592753757673374e-06, "loss": 1.6725, "step": 4200 }, { "epoch": 0.12275649416761207, "grad_norm": 15.0625, "learning_rate": 9.590814476000759e-06, "loss": 1.6429, "step": 4220 }, { "epoch": 0.12333827850015999, "grad_norm": 12.625, "learning_rate": 9.588875194328144e-06, "loss": 1.67, "step": 4240 }, { "epoch": 0.12392006283270791, "grad_norm": 10.9375, "learning_rate": 9.58693591265553e-06, "loss": 1.6725, "step": 4260 }, { "epoch": 0.12450184716525584, "grad_norm": 12.1875, "learning_rate": 9.584996630982914e-06, "loss": 1.6875, "step": 4280 }, { "epoch": 0.12508363149780377, "grad_norm": 12.125, "learning_rate": 9.5830573493103e-06, "loss": 1.6594, "step": 4300 }, { "epoch": 0.12566541583035168, "grad_norm": 12.6875, "learning_rate": 9.581118067637685e-06, "loss": 1.6841, "step": 4320 }, { "epoch": 0.12624720016289961, "grad_norm": 12.75, "learning_rate": 9.57917878596507e-06, "loss": 1.6427, "step": 4340 }, { "epoch": 0.12682898449544755, "grad_norm": 13.625, "learning_rate": 9.577239504292455e-06, "loss": 1.6997, "step": 4360 }, { "epoch": 0.12741076882799546, "grad_norm": 14.75, "learning_rate": 9.57530022261984e-06, "loss": 1.732, "step": 4380 }, { "epoch": 0.1279925531605434, "grad_norm": 15.125, "learning_rate": 9.573360940947225e-06, "loss": 1.8135, "step": 4400 }, { "epoch": 0.1285743374930913, "grad_norm": 11.75, "learning_rate": 9.57142165927461e-06, "loss": 1.6542, "step": 4420 }, { "epoch": 0.12915612182563924, "grad_norm": 11.5, "learning_rate": 9.569482377601995e-06, "loss": 1.6981, "step": 4440 }, { "epoch": 0.12973790615818717, "grad_norm": 11.5, "learning_rate": 9.56754309592938e-06, "loss": 1.6306, "step": 4460 }, { "epoch": 0.13031969049073508, "grad_norm": 10.75, "learning_rate": 9.565603814256765e-06, "loss": 1.6223, "step": 4480 }, { "epoch": 0.13090147482328301, "grad_norm": 13.1875, "learning_rate": 9.56366453258415e-06, "loss": 1.6891, "step": 4500 }, { "epoch": 0.13148325915583092, "grad_norm": 17.125, "learning_rate": 9.561725250911536e-06, "loss": 1.691, "step": 4520 }, { "epoch": 0.13206504348837886, "grad_norm": 13.6875, "learning_rate": 9.55978596923892e-06, "loss": 1.5271, "step": 4540 }, { "epoch": 0.1326468278209268, "grad_norm": 11.375, "learning_rate": 9.557846687566306e-06, "loss": 1.6127, "step": 4560 }, { "epoch": 0.1332286121534747, "grad_norm": 15.1875, "learning_rate": 9.55590740589369e-06, "loss": 1.7001, "step": 4580 }, { "epoch": 0.13381039648602264, "grad_norm": 15.4375, "learning_rate": 9.553968124221076e-06, "loss": 1.6584, "step": 4600 }, { "epoch": 0.13439218081857054, "grad_norm": 11.8125, "learning_rate": 9.552028842548461e-06, "loss": 1.6233, "step": 4620 }, { "epoch": 0.13497396515111848, "grad_norm": 15.625, "learning_rate": 9.550089560875846e-06, "loss": 1.7007, "step": 4640 }, { "epoch": 0.13555574948366642, "grad_norm": 12.9375, "learning_rate": 9.548150279203231e-06, "loss": 1.7278, "step": 4660 }, { "epoch": 0.13613753381621432, "grad_norm": 12.6875, "learning_rate": 9.546210997530616e-06, "loss": 1.7739, "step": 4680 }, { "epoch": 0.13671931814876226, "grad_norm": 13.4375, "learning_rate": 9.544271715858001e-06, "loss": 1.6601, "step": 4700 }, { "epoch": 0.13730110248131017, "grad_norm": 15.0625, "learning_rate": 9.542332434185387e-06, "loss": 1.6969, "step": 4720 }, { "epoch": 0.1378828868138581, "grad_norm": 14.4375, "learning_rate": 9.540393152512772e-06, "loss": 1.6679, "step": 4740 }, { "epoch": 0.13846467114640604, "grad_norm": 12.375, "learning_rate": 9.538453870840157e-06, "loss": 1.6858, "step": 4760 }, { "epoch": 0.13904645547895395, "grad_norm": 11.3125, "learning_rate": 9.536514589167542e-06, "loss": 1.6907, "step": 4780 }, { "epoch": 0.13962823981150188, "grad_norm": 11.8125, "learning_rate": 9.534575307494927e-06, "loss": 1.6233, "step": 4800 }, { "epoch": 0.1402100241440498, "grad_norm": 12.75, "learning_rate": 9.532636025822312e-06, "loss": 1.689, "step": 4820 }, { "epoch": 0.14079180847659772, "grad_norm": 13.1875, "learning_rate": 9.530696744149697e-06, "loss": 1.6551, "step": 4840 }, { "epoch": 0.14137359280914566, "grad_norm": 13.8125, "learning_rate": 9.528757462477082e-06, "loss": 1.6108, "step": 4860 }, { "epoch": 0.14195537714169357, "grad_norm": 11.625, "learning_rate": 9.526818180804467e-06, "loss": 1.7142, "step": 4880 }, { "epoch": 0.1425371614742415, "grad_norm": 12.75, "learning_rate": 9.524878899131852e-06, "loss": 1.6855, "step": 4900 }, { "epoch": 0.1431189458067894, "grad_norm": 10.0, "learning_rate": 9.522939617459237e-06, "loss": 1.7138, "step": 4920 }, { "epoch": 0.14370073013933735, "grad_norm": 11.75, "learning_rate": 9.521000335786623e-06, "loss": 1.7594, "step": 4940 }, { "epoch": 0.14428251447188528, "grad_norm": 12.0625, "learning_rate": 9.519061054114008e-06, "loss": 1.6585, "step": 4960 }, { "epoch": 0.1448642988044332, "grad_norm": 14.5625, "learning_rate": 9.517121772441393e-06, "loss": 1.7204, "step": 4980 }, { "epoch": 0.14544608313698112, "grad_norm": 12.5, "learning_rate": 9.515182490768778e-06, "loss": 1.6766, "step": 5000 }, { "epoch": 0.14602786746952903, "grad_norm": 13.125, "learning_rate": 9.513243209096163e-06, "loss": 1.7624, "step": 5020 }, { "epoch": 0.14660965180207697, "grad_norm": 12.1875, "learning_rate": 9.511303927423548e-06, "loss": 1.7071, "step": 5040 }, { "epoch": 0.1471914361346249, "grad_norm": 12.8125, "learning_rate": 9.509364645750933e-06, "loss": 1.7172, "step": 5060 }, { "epoch": 0.1477732204671728, "grad_norm": 10.9375, "learning_rate": 9.507425364078318e-06, "loss": 1.7156, "step": 5080 }, { "epoch": 0.14835500479972075, "grad_norm": 12.9375, "learning_rate": 9.505486082405703e-06, "loss": 1.6806, "step": 5100 }, { "epoch": 0.14893678913226865, "grad_norm": 13.0, "learning_rate": 9.503546800733088e-06, "loss": 1.652, "step": 5120 }, { "epoch": 0.1495185734648166, "grad_norm": 14.0625, "learning_rate": 9.501607519060474e-06, "loss": 1.6509, "step": 5140 }, { "epoch": 0.15010035779736453, "grad_norm": 12.0, "learning_rate": 9.499668237387859e-06, "loss": 1.7384, "step": 5160 }, { "epoch": 0.15068214212991243, "grad_norm": 13.25, "learning_rate": 9.497728955715244e-06, "loss": 1.6139, "step": 5180 }, { "epoch": 0.15126392646246037, "grad_norm": 13.3125, "learning_rate": 9.495789674042629e-06, "loss": 1.708, "step": 5200 }, { "epoch": 0.1518457107950083, "grad_norm": 15.6875, "learning_rate": 9.493850392370014e-06, "loss": 1.6423, "step": 5220 }, { "epoch": 0.1524274951275562, "grad_norm": 12.9375, "learning_rate": 9.491911110697399e-06, "loss": 1.7107, "step": 5240 }, { "epoch": 0.15300927946010415, "grad_norm": 12.9375, "learning_rate": 9.489971829024784e-06, "loss": 1.6481, "step": 5260 }, { "epoch": 0.15359106379265205, "grad_norm": 12.875, "learning_rate": 9.488032547352168e-06, "loss": 1.5836, "step": 5280 }, { "epoch": 0.1541728481252, "grad_norm": 10.6875, "learning_rate": 9.486093265679553e-06, "loss": 1.6645, "step": 5300 }, { "epoch": 0.15475463245774793, "grad_norm": 11.875, "learning_rate": 9.484153984006938e-06, "loss": 1.7185, "step": 5320 }, { "epoch": 0.15533641679029583, "grad_norm": 13.0625, "learning_rate": 9.482214702334323e-06, "loss": 1.5641, "step": 5340 }, { "epoch": 0.15591820112284377, "grad_norm": 12.875, "learning_rate": 9.480275420661708e-06, "loss": 1.7268, "step": 5360 }, { "epoch": 0.15649998545539168, "grad_norm": 9.9375, "learning_rate": 9.478336138989093e-06, "loss": 1.6341, "step": 5380 }, { "epoch": 0.1570817697879396, "grad_norm": 15.8125, "learning_rate": 9.476396857316478e-06, "loss": 1.7263, "step": 5400 }, { "epoch": 0.15766355412048755, "grad_norm": 12.9375, "learning_rate": 9.474457575643863e-06, "loss": 1.7495, "step": 5420 }, { "epoch": 0.15824533845303546, "grad_norm": 14.9375, "learning_rate": 9.472518293971248e-06, "loss": 1.7751, "step": 5440 }, { "epoch": 0.1588271227855834, "grad_norm": 12.375, "learning_rate": 9.470579012298633e-06, "loss": 1.6839, "step": 5460 }, { "epoch": 0.1594089071181313, "grad_norm": 13.0, "learning_rate": 9.468639730626019e-06, "loss": 1.6549, "step": 5480 }, { "epoch": 0.15999069145067923, "grad_norm": 12.125, "learning_rate": 9.466700448953404e-06, "loss": 1.6218, "step": 5500 }, { "epoch": 0.16057247578322717, "grad_norm": 10.125, "learning_rate": 9.464761167280789e-06, "loss": 1.7006, "step": 5520 }, { "epoch": 0.16115426011577508, "grad_norm": 14.25, "learning_rate": 9.462821885608174e-06, "loss": 1.7311, "step": 5540 }, { "epoch": 0.161736044448323, "grad_norm": 11.125, "learning_rate": 9.460882603935559e-06, "loss": 1.6975, "step": 5560 }, { "epoch": 0.16231782878087092, "grad_norm": 10.0625, "learning_rate": 9.458943322262944e-06, "loss": 1.6798, "step": 5580 }, { "epoch": 0.16289961311341886, "grad_norm": 11.5625, "learning_rate": 9.457004040590329e-06, "loss": 1.7398, "step": 5600 }, { "epoch": 0.1634813974459668, "grad_norm": 12.125, "learning_rate": 9.455064758917714e-06, "loss": 1.6346, "step": 5620 }, { "epoch": 0.1640631817785147, "grad_norm": 16.375, "learning_rate": 9.4531254772451e-06, "loss": 1.6982, "step": 5640 }, { "epoch": 0.16464496611106264, "grad_norm": 13.625, "learning_rate": 9.451186195572484e-06, "loss": 1.6089, "step": 5660 }, { "epoch": 0.16522675044361054, "grad_norm": 12.125, "learning_rate": 9.44924691389987e-06, "loss": 1.5636, "step": 5680 }, { "epoch": 0.16580853477615848, "grad_norm": 11.75, "learning_rate": 9.447307632227255e-06, "loss": 1.6292, "step": 5700 }, { "epoch": 0.1663903191087064, "grad_norm": 11.8125, "learning_rate": 9.44536835055464e-06, "loss": 1.6882, "step": 5720 }, { "epoch": 0.16697210344125432, "grad_norm": 12.125, "learning_rate": 9.443429068882025e-06, "loss": 1.5953, "step": 5740 }, { "epoch": 0.16755388777380226, "grad_norm": 11.0, "learning_rate": 9.44148978720941e-06, "loss": 1.6087, "step": 5760 }, { "epoch": 0.16813567210635016, "grad_norm": 12.5, "learning_rate": 9.439550505536795e-06, "loss": 1.6753, "step": 5780 }, { "epoch": 0.1687174564388981, "grad_norm": 12.875, "learning_rate": 9.43761122386418e-06, "loss": 1.6255, "step": 5800 }, { "epoch": 0.16929924077144604, "grad_norm": 13.0625, "learning_rate": 9.435671942191565e-06, "loss": 1.6765, "step": 5820 }, { "epoch": 0.16988102510399394, "grad_norm": 9.6875, "learning_rate": 9.43373266051895e-06, "loss": 1.7139, "step": 5840 }, { "epoch": 0.17046280943654188, "grad_norm": 10.6875, "learning_rate": 9.431793378846335e-06, "loss": 1.7313, "step": 5860 }, { "epoch": 0.1710445937690898, "grad_norm": 12.8125, "learning_rate": 9.42985409717372e-06, "loss": 1.6744, "step": 5880 }, { "epoch": 0.17162637810163772, "grad_norm": 11.875, "learning_rate": 9.427914815501106e-06, "loss": 1.6938, "step": 5900 }, { "epoch": 0.17220816243418566, "grad_norm": 13.125, "learning_rate": 9.42597553382849e-06, "loss": 1.7012, "step": 5920 }, { "epoch": 0.17278994676673357, "grad_norm": 10.625, "learning_rate": 9.424036252155876e-06, "loss": 1.6517, "step": 5940 }, { "epoch": 0.1733717310992815, "grad_norm": 13.5625, "learning_rate": 9.422096970483261e-06, "loss": 1.7294, "step": 5960 }, { "epoch": 0.1739535154318294, "grad_norm": 9.25, "learning_rate": 9.420157688810646e-06, "loss": 1.761, "step": 5980 }, { "epoch": 0.17453529976437734, "grad_norm": 12.9375, "learning_rate": 9.418218407138031e-06, "loss": 1.7016, "step": 6000 }, { "epoch": 0.17511708409692528, "grad_norm": 13.875, "learning_rate": 9.416279125465416e-06, "loss": 1.6872, "step": 6020 }, { "epoch": 0.1756988684294732, "grad_norm": 12.0625, "learning_rate": 9.414339843792801e-06, "loss": 1.6448, "step": 6040 }, { "epoch": 0.17628065276202112, "grad_norm": 14.6875, "learning_rate": 9.412400562120186e-06, "loss": 1.7946, "step": 6060 }, { "epoch": 0.17686243709456903, "grad_norm": 11.875, "learning_rate": 9.410461280447571e-06, "loss": 1.5854, "step": 6080 }, { "epoch": 0.17744422142711697, "grad_norm": 15.75, "learning_rate": 9.408521998774957e-06, "loss": 1.6513, "step": 6100 }, { "epoch": 0.1780260057596649, "grad_norm": 12.0625, "learning_rate": 9.406582717102342e-06, "loss": 1.6085, "step": 6120 }, { "epoch": 0.1786077900922128, "grad_norm": 14.25, "learning_rate": 9.404643435429727e-06, "loss": 1.7134, "step": 6140 }, { "epoch": 0.17918957442476074, "grad_norm": 9.25, "learning_rate": 9.402704153757112e-06, "loss": 1.6351, "step": 6160 }, { "epoch": 0.17977135875730865, "grad_norm": 12.375, "learning_rate": 9.400764872084497e-06, "loss": 1.6563, "step": 6180 }, { "epoch": 0.1803531430898566, "grad_norm": 15.125, "learning_rate": 9.398825590411882e-06, "loss": 1.6515, "step": 6200 }, { "epoch": 0.18093492742240452, "grad_norm": 12.8125, "learning_rate": 9.396886308739267e-06, "loss": 1.593, "step": 6220 }, { "epoch": 0.18151671175495243, "grad_norm": 11.0625, "learning_rate": 9.394947027066652e-06, "loss": 1.6267, "step": 6240 }, { "epoch": 0.18209849608750037, "grad_norm": 13.875, "learning_rate": 9.393007745394037e-06, "loss": 1.6304, "step": 6260 }, { "epoch": 0.18268028042004827, "grad_norm": 11.375, "learning_rate": 9.391068463721422e-06, "loss": 1.7136, "step": 6280 }, { "epoch": 0.1832620647525962, "grad_norm": 12.8125, "learning_rate": 9.389129182048808e-06, "loss": 1.7938, "step": 6300 }, { "epoch": 0.18384384908514415, "grad_norm": 11.625, "learning_rate": 9.387189900376193e-06, "loss": 1.7028, "step": 6320 }, { "epoch": 0.18442563341769205, "grad_norm": 11.25, "learning_rate": 9.385250618703578e-06, "loss": 1.6818, "step": 6340 }, { "epoch": 0.18500741775024, "grad_norm": 12.25, "learning_rate": 9.383311337030963e-06, "loss": 1.668, "step": 6360 }, { "epoch": 0.18558920208278792, "grad_norm": 19.125, "learning_rate": 9.381372055358348e-06, "loss": 1.6622, "step": 6380 }, { "epoch": 0.18617098641533583, "grad_norm": 12.125, "learning_rate": 9.379432773685733e-06, "loss": 1.6649, "step": 6400 }, { "epoch": 0.18675277074788377, "grad_norm": 12.3125, "learning_rate": 9.377493492013118e-06, "loss": 1.6025, "step": 6420 }, { "epoch": 0.18733455508043168, "grad_norm": 11.4375, "learning_rate": 9.375554210340503e-06, "loss": 1.7254, "step": 6440 }, { "epoch": 0.1879163394129796, "grad_norm": 13.9375, "learning_rate": 9.373614928667888e-06, "loss": 1.6749, "step": 6460 }, { "epoch": 0.18849812374552755, "grad_norm": 16.375, "learning_rate": 9.371675646995273e-06, "loss": 1.7615, "step": 6480 }, { "epoch": 0.18907990807807545, "grad_norm": 11.1875, "learning_rate": 9.369736365322659e-06, "loss": 1.582, "step": 6500 }, { "epoch": 0.1896616924106234, "grad_norm": 14.4375, "learning_rate": 9.367797083650044e-06, "loss": 1.6986, "step": 6520 }, { "epoch": 0.1902434767431713, "grad_norm": 15.25, "learning_rate": 9.365857801977429e-06, "loss": 1.6597, "step": 6540 }, { "epoch": 0.19082526107571923, "grad_norm": 12.5, "learning_rate": 9.363918520304814e-06, "loss": 1.6914, "step": 6560 }, { "epoch": 0.19140704540826717, "grad_norm": 13.8125, "learning_rate": 9.361979238632199e-06, "loss": 1.6375, "step": 6580 }, { "epoch": 0.19198882974081508, "grad_norm": 12.125, "learning_rate": 9.360039956959584e-06, "loss": 1.6513, "step": 6600 }, { "epoch": 0.192570614073363, "grad_norm": 13.625, "learning_rate": 9.358100675286969e-06, "loss": 1.6101, "step": 6620 }, { "epoch": 0.19315239840591092, "grad_norm": 10.875, "learning_rate": 9.356161393614354e-06, "loss": 1.686, "step": 6640 }, { "epoch": 0.19373418273845885, "grad_norm": 11.4375, "learning_rate": 9.35422211194174e-06, "loss": 1.7196, "step": 6660 }, { "epoch": 0.1943159670710068, "grad_norm": 12.5, "learning_rate": 9.352282830269124e-06, "loss": 1.6264, "step": 6680 }, { "epoch": 0.1948977514035547, "grad_norm": 12.375, "learning_rate": 9.35034354859651e-06, "loss": 1.6006, "step": 6700 }, { "epoch": 0.19547953573610263, "grad_norm": 13.0625, "learning_rate": 9.348404266923895e-06, "loss": 1.6414, "step": 6720 }, { "epoch": 0.19606132006865054, "grad_norm": 12.3125, "learning_rate": 9.34646498525128e-06, "loss": 1.622, "step": 6740 }, { "epoch": 0.19664310440119848, "grad_norm": 15.9375, "learning_rate": 9.344525703578665e-06, "loss": 1.6411, "step": 6760 }, { "epoch": 0.1972248887337464, "grad_norm": 13.0625, "learning_rate": 9.34258642190605e-06, "loss": 1.6437, "step": 6780 }, { "epoch": 0.19780667306629432, "grad_norm": 14.75, "learning_rate": 9.340647140233435e-06, "loss": 1.7252, "step": 6800 }, { "epoch": 0.19838845739884226, "grad_norm": 14.25, "learning_rate": 9.33870785856082e-06, "loss": 1.6502, "step": 6820 }, { "epoch": 0.19897024173139016, "grad_norm": 13.0, "learning_rate": 9.336768576888205e-06, "loss": 1.7225, "step": 6840 }, { "epoch": 0.1995520260639381, "grad_norm": 17.0, "learning_rate": 9.33482929521559e-06, "loss": 1.6959, "step": 6860 }, { "epoch": 0.20013381039648603, "grad_norm": 12.1875, "learning_rate": 9.332890013542975e-06, "loss": 1.6273, "step": 6880 }, { "epoch": 0.20071559472903394, "grad_norm": 11.875, "learning_rate": 9.330950731870359e-06, "loss": 1.7678, "step": 6900 }, { "epoch": 0.20129737906158188, "grad_norm": 10.375, "learning_rate": 9.329011450197744e-06, "loss": 1.7142, "step": 6920 }, { "epoch": 0.20187916339412978, "grad_norm": 13.375, "learning_rate": 9.327072168525129e-06, "loss": 1.6364, "step": 6940 }, { "epoch": 0.20246094772667772, "grad_norm": 14.25, "learning_rate": 9.325132886852514e-06, "loss": 1.7133, "step": 6960 }, { "epoch": 0.20304273205922566, "grad_norm": 13.375, "learning_rate": 9.323193605179899e-06, "loss": 1.7386, "step": 6980 }, { "epoch": 0.20362451639177356, "grad_norm": 10.625, "learning_rate": 9.321254323507284e-06, "loss": 1.6479, "step": 7000 }, { "epoch": 0.2042063007243215, "grad_norm": 12.8125, "learning_rate": 9.31931504183467e-06, "loss": 1.7419, "step": 7020 }, { "epoch": 0.2047880850568694, "grad_norm": 14.5625, "learning_rate": 9.317375760162054e-06, "loss": 1.6332, "step": 7040 }, { "epoch": 0.20536986938941734, "grad_norm": 15.3125, "learning_rate": 9.31543647848944e-06, "loss": 1.7959, "step": 7060 }, { "epoch": 0.20595165372196528, "grad_norm": 13.9375, "learning_rate": 9.313497196816825e-06, "loss": 1.5956, "step": 7080 }, { "epoch": 0.20653343805451319, "grad_norm": 9.4375, "learning_rate": 9.31155791514421e-06, "loss": 1.5998, "step": 7100 }, { "epoch": 0.20711522238706112, "grad_norm": 12.5625, "learning_rate": 9.309618633471595e-06, "loss": 1.6106, "step": 7120 }, { "epoch": 0.20769700671960903, "grad_norm": 11.375, "learning_rate": 9.30767935179898e-06, "loss": 1.6298, "step": 7140 }, { "epoch": 0.20827879105215696, "grad_norm": 12.75, "learning_rate": 9.305740070126365e-06, "loss": 1.7399, "step": 7160 }, { "epoch": 0.2088605753847049, "grad_norm": 14.3125, "learning_rate": 9.30380078845375e-06, "loss": 1.649, "step": 7180 }, { "epoch": 0.2094423597172528, "grad_norm": 9.1875, "learning_rate": 9.301861506781135e-06, "loss": 1.6379, "step": 7200 }, { "epoch": 0.21002414404980074, "grad_norm": 11.0, "learning_rate": 9.29992222510852e-06, "loss": 1.7574, "step": 7220 }, { "epoch": 0.21060592838234865, "grad_norm": 10.9375, "learning_rate": 9.297982943435905e-06, "loss": 1.7173, "step": 7240 }, { "epoch": 0.21118771271489659, "grad_norm": 13.0625, "learning_rate": 9.29604366176329e-06, "loss": 1.6358, "step": 7260 }, { "epoch": 0.21176949704744452, "grad_norm": 11.375, "learning_rate": 9.294104380090676e-06, "loss": 1.677, "step": 7280 }, { "epoch": 0.21235128137999243, "grad_norm": 12.8125, "learning_rate": 9.29216509841806e-06, "loss": 1.6471, "step": 7300 }, { "epoch": 0.21293306571254036, "grad_norm": 11.625, "learning_rate": 9.290225816745446e-06, "loss": 1.7079, "step": 7320 }, { "epoch": 0.21351485004508827, "grad_norm": 13.0, "learning_rate": 9.288286535072831e-06, "loss": 1.6672, "step": 7340 }, { "epoch": 0.2140966343776362, "grad_norm": 12.75, "learning_rate": 9.286347253400216e-06, "loss": 1.512, "step": 7360 }, { "epoch": 0.21467841871018414, "grad_norm": 13.1875, "learning_rate": 9.284407971727601e-06, "loss": 1.6741, "step": 7380 }, { "epoch": 0.21526020304273205, "grad_norm": 11.8125, "learning_rate": 9.282468690054986e-06, "loss": 1.7275, "step": 7400 }, { "epoch": 0.21584198737528, "grad_norm": 12.4375, "learning_rate": 9.280529408382371e-06, "loss": 1.6383, "step": 7420 }, { "epoch": 0.2164237717078279, "grad_norm": 11.125, "learning_rate": 9.278590126709756e-06, "loss": 1.6778, "step": 7440 }, { "epoch": 0.21700555604037583, "grad_norm": 12.875, "learning_rate": 9.276650845037141e-06, "loss": 1.6757, "step": 7460 }, { "epoch": 0.21758734037292377, "grad_norm": 15.6875, "learning_rate": 9.274711563364527e-06, "loss": 1.7122, "step": 7480 }, { "epoch": 0.21816912470547167, "grad_norm": 15.0625, "learning_rate": 9.272772281691912e-06, "loss": 1.7075, "step": 7500 }, { "epoch": 0.2187509090380196, "grad_norm": 12.25, "learning_rate": 9.270833000019297e-06, "loss": 1.7087, "step": 7520 }, { "epoch": 0.21933269337056754, "grad_norm": 12.375, "learning_rate": 9.268893718346682e-06, "loss": 1.6896, "step": 7540 }, { "epoch": 0.21991447770311545, "grad_norm": 13.75, "learning_rate": 9.266954436674067e-06, "loss": 1.7717, "step": 7560 }, { "epoch": 0.2204962620356634, "grad_norm": 12.25, "learning_rate": 9.265015155001452e-06, "loss": 1.5941, "step": 7580 }, { "epoch": 0.2210780463682113, "grad_norm": 12.75, "learning_rate": 9.263075873328837e-06, "loss": 1.6637, "step": 7600 }, { "epoch": 0.22165983070075923, "grad_norm": 13.6875, "learning_rate": 9.261136591656222e-06, "loss": 1.6377, "step": 7620 }, { "epoch": 0.22224161503330717, "grad_norm": 12.3125, "learning_rate": 9.259197309983607e-06, "loss": 1.6165, "step": 7640 }, { "epoch": 0.22282339936585507, "grad_norm": 11.875, "learning_rate": 9.257258028310992e-06, "loss": 1.6967, "step": 7660 }, { "epoch": 0.223405183698403, "grad_norm": 13.8125, "learning_rate": 9.255318746638378e-06, "loss": 1.6089, "step": 7680 }, { "epoch": 0.22398696803095092, "grad_norm": 12.75, "learning_rate": 9.253379464965763e-06, "loss": 1.6017, "step": 7700 }, { "epoch": 0.22456875236349885, "grad_norm": 15.4375, "learning_rate": 9.251440183293148e-06, "loss": 1.6056, "step": 7720 }, { "epoch": 0.2251505366960468, "grad_norm": 11.9375, "learning_rate": 9.249500901620533e-06, "loss": 1.614, "step": 7740 }, { "epoch": 0.2257323210285947, "grad_norm": 13.8125, "learning_rate": 9.247561619947918e-06, "loss": 1.6657, "step": 7760 }, { "epoch": 0.22631410536114263, "grad_norm": 12.5625, "learning_rate": 9.245622338275303e-06, "loss": 1.699, "step": 7780 }, { "epoch": 0.22689588969369054, "grad_norm": 15.5, "learning_rate": 9.243683056602688e-06, "loss": 1.6335, "step": 7800 }, { "epoch": 0.22747767402623847, "grad_norm": 14.6875, "learning_rate": 9.241743774930073e-06, "loss": 1.7117, "step": 7820 }, { "epoch": 0.2280594583587864, "grad_norm": 10.375, "learning_rate": 9.239804493257457e-06, "loss": 1.6218, "step": 7840 }, { "epoch": 0.22864124269133432, "grad_norm": 12.5, "learning_rate": 9.237865211584842e-06, "loss": 1.6866, "step": 7860 }, { "epoch": 0.22922302702388225, "grad_norm": 12.5, "learning_rate": 9.235925929912227e-06, "loss": 1.6505, "step": 7880 }, { "epoch": 0.22980481135643016, "grad_norm": 12.5625, "learning_rate": 9.233986648239612e-06, "loss": 1.6149, "step": 7900 }, { "epoch": 0.2303865956889781, "grad_norm": 11.3125, "learning_rate": 9.232047366566997e-06, "loss": 1.7142, "step": 7920 }, { "epoch": 0.23096838002152603, "grad_norm": 13.875, "learning_rate": 9.230108084894382e-06, "loss": 1.6732, "step": 7940 }, { "epoch": 0.23155016435407394, "grad_norm": 12.25, "learning_rate": 9.228168803221767e-06, "loss": 1.797, "step": 7960 }, { "epoch": 0.23213194868662188, "grad_norm": 11.8125, "learning_rate": 9.226229521549152e-06, "loss": 1.779, "step": 7980 }, { "epoch": 0.23271373301916978, "grad_norm": 14.875, "learning_rate": 9.224290239876537e-06, "loss": 1.6473, "step": 8000 }, { "epoch": 0.23329551735171772, "grad_norm": 10.625, "learning_rate": 9.222350958203923e-06, "loss": 1.6575, "step": 8020 }, { "epoch": 0.23387730168426565, "grad_norm": 12.8125, "learning_rate": 9.220411676531308e-06, "loss": 1.6458, "step": 8040 }, { "epoch": 0.23445908601681356, "grad_norm": 13.5, "learning_rate": 9.218472394858693e-06, "loss": 1.6488, "step": 8060 }, { "epoch": 0.2350408703493615, "grad_norm": 11.5625, "learning_rate": 9.216533113186078e-06, "loss": 1.681, "step": 8080 }, { "epoch": 0.2356226546819094, "grad_norm": 12.125, "learning_rate": 9.214593831513463e-06, "loss": 1.6335, "step": 8100 }, { "epoch": 0.23620443901445734, "grad_norm": 8.625, "learning_rate": 9.212654549840848e-06, "loss": 1.6555, "step": 8120 }, { "epoch": 0.23678622334700528, "grad_norm": 11.5625, "learning_rate": 9.210715268168233e-06, "loss": 1.6875, "step": 8140 }, { "epoch": 0.23736800767955318, "grad_norm": 13.125, "learning_rate": 9.208775986495618e-06, "loss": 1.6405, "step": 8160 }, { "epoch": 0.23794979201210112, "grad_norm": 14.1875, "learning_rate": 9.206836704823003e-06, "loss": 1.629, "step": 8180 }, { "epoch": 0.23853157634464903, "grad_norm": 13.0625, "learning_rate": 9.204897423150388e-06, "loss": 1.6058, "step": 8200 }, { "epoch": 0.23911336067719696, "grad_norm": 15.6875, "learning_rate": 9.202958141477773e-06, "loss": 1.6622, "step": 8220 }, { "epoch": 0.2396951450097449, "grad_norm": 13.5, "learning_rate": 9.201018859805159e-06, "loss": 1.6367, "step": 8240 }, { "epoch": 0.2402769293422928, "grad_norm": 15.375, "learning_rate": 9.199079578132544e-06, "loss": 1.6016, "step": 8260 }, { "epoch": 0.24085871367484074, "grad_norm": 13.8125, "learning_rate": 9.197140296459929e-06, "loss": 1.6196, "step": 8280 }, { "epoch": 0.24144049800738865, "grad_norm": 13.0625, "learning_rate": 9.195201014787314e-06, "loss": 1.6289, "step": 8300 }, { "epoch": 0.24202228233993658, "grad_norm": 12.0, "learning_rate": 9.193261733114699e-06, "loss": 1.6877, "step": 8320 }, { "epoch": 0.24260406667248452, "grad_norm": 13.25, "learning_rate": 9.191322451442084e-06, "loss": 1.7558, "step": 8340 }, { "epoch": 0.24318585100503243, "grad_norm": 12.0, "learning_rate": 9.18938316976947e-06, "loss": 1.7199, "step": 8360 }, { "epoch": 0.24376763533758036, "grad_norm": 12.25, "learning_rate": 9.187443888096854e-06, "loss": 1.6592, "step": 8380 }, { "epoch": 0.24434941967012827, "grad_norm": 10.25, "learning_rate": 9.18550460642424e-06, "loss": 1.6278, "step": 8400 }, { "epoch": 0.2449312040026762, "grad_norm": 13.8125, "learning_rate": 9.183565324751624e-06, "loss": 1.6542, "step": 8420 }, { "epoch": 0.24551298833522414, "grad_norm": 13.8125, "learning_rate": 9.18162604307901e-06, "loss": 1.6557, "step": 8440 }, { "epoch": 0.24609477266777205, "grad_norm": 12.625, "learning_rate": 9.179686761406395e-06, "loss": 1.7821, "step": 8460 }, { "epoch": 0.24667655700031998, "grad_norm": 13.5625, "learning_rate": 9.17774747973378e-06, "loss": 1.5931, "step": 8480 }, { "epoch": 0.2472583413328679, "grad_norm": 12.0, "learning_rate": 9.175808198061165e-06, "loss": 1.6858, "step": 8500 }, { "epoch": 0.24784012566541583, "grad_norm": 12.375, "learning_rate": 9.17386891638855e-06, "loss": 1.667, "step": 8520 }, { "epoch": 0.24842190999796376, "grad_norm": 12.875, "learning_rate": 9.171929634715935e-06, "loss": 1.6255, "step": 8540 }, { "epoch": 0.24900369433051167, "grad_norm": 16.375, "learning_rate": 9.16999035304332e-06, "loss": 1.62, "step": 8560 }, { "epoch": 0.2495854786630596, "grad_norm": 13.3125, "learning_rate": 9.168051071370705e-06, "loss": 1.6483, "step": 8580 }, { "epoch": 0.25016726299560754, "grad_norm": 14.0, "learning_rate": 9.16611178969809e-06, "loss": 1.7872, "step": 8600 }, { "epoch": 0.25074904732815545, "grad_norm": 14.0, "learning_rate": 9.164172508025475e-06, "loss": 1.6429, "step": 8620 }, { "epoch": 0.25133083166070336, "grad_norm": 13.0, "learning_rate": 9.16223322635286e-06, "loss": 1.6614, "step": 8640 }, { "epoch": 0.2519126159932513, "grad_norm": 12.25, "learning_rate": 9.160293944680246e-06, "loss": 1.7051, "step": 8660 }, { "epoch": 0.25249440032579923, "grad_norm": 12.1875, "learning_rate": 9.15835466300763e-06, "loss": 1.6515, "step": 8680 }, { "epoch": 0.25307618465834714, "grad_norm": 11.25, "learning_rate": 9.156415381335016e-06, "loss": 1.6462, "step": 8700 }, { "epoch": 0.2536579689908951, "grad_norm": 12.0, "learning_rate": 9.154476099662401e-06, "loss": 1.6229, "step": 8720 }, { "epoch": 0.254239753323443, "grad_norm": 13.625, "learning_rate": 9.152536817989786e-06, "loss": 1.6951, "step": 8740 }, { "epoch": 0.2548215376559909, "grad_norm": 11.3125, "learning_rate": 9.150597536317171e-06, "loss": 1.6354, "step": 8760 }, { "epoch": 0.2554033219885388, "grad_norm": 12.9375, "learning_rate": 9.148658254644556e-06, "loss": 1.6501, "step": 8780 }, { "epoch": 0.2559851063210868, "grad_norm": 10.5, "learning_rate": 9.146718972971941e-06, "loss": 1.7109, "step": 8800 }, { "epoch": 0.2565668906536347, "grad_norm": 12.6875, "learning_rate": 9.144779691299326e-06, "loss": 1.6589, "step": 8820 }, { "epoch": 0.2571486749861826, "grad_norm": 13.625, "learning_rate": 9.142840409626712e-06, "loss": 1.7272, "step": 8840 }, { "epoch": 0.25773045931873056, "grad_norm": 14.6875, "learning_rate": 9.140901127954097e-06, "loss": 1.6892, "step": 8860 }, { "epoch": 0.2583122436512785, "grad_norm": 13.1875, "learning_rate": 9.138961846281482e-06, "loss": 1.6675, "step": 8880 }, { "epoch": 0.2588940279838264, "grad_norm": 11.25, "learning_rate": 9.137022564608867e-06, "loss": 1.5748, "step": 8900 }, { "epoch": 0.25947581231637434, "grad_norm": 13.6875, "learning_rate": 9.135083282936252e-06, "loss": 1.6906, "step": 8920 }, { "epoch": 0.26005759664892225, "grad_norm": 13.4375, "learning_rate": 9.133144001263637e-06, "loss": 1.7507, "step": 8940 }, { "epoch": 0.26063938098147016, "grad_norm": 13.25, "learning_rate": 9.131204719591022e-06, "loss": 1.7374, "step": 8960 }, { "epoch": 0.26122116531401807, "grad_norm": 13.125, "learning_rate": 9.129265437918407e-06, "loss": 1.6141, "step": 8980 }, { "epoch": 0.26180294964656603, "grad_norm": 11.375, "learning_rate": 9.127326156245792e-06, "loss": 1.6063, "step": 9000 }, { "epoch": 0.26238473397911394, "grad_norm": 13.375, "learning_rate": 9.125386874573177e-06, "loss": 1.7222, "step": 9020 }, { "epoch": 0.26296651831166185, "grad_norm": 14.0625, "learning_rate": 9.123447592900562e-06, "loss": 1.7228, "step": 9040 }, { "epoch": 0.2635483026442098, "grad_norm": 12.5, "learning_rate": 9.121508311227948e-06, "loss": 1.6867, "step": 9060 }, { "epoch": 0.2641300869767577, "grad_norm": 11.25, "learning_rate": 9.119569029555333e-06, "loss": 1.7106, "step": 9080 }, { "epoch": 0.2647118713093056, "grad_norm": 13.0625, "learning_rate": 9.117629747882716e-06, "loss": 1.6625, "step": 9100 }, { "epoch": 0.2652936556418536, "grad_norm": 12.625, "learning_rate": 9.115690466210101e-06, "loss": 1.6357, "step": 9120 }, { "epoch": 0.2658754399744015, "grad_norm": 14.3125, "learning_rate": 9.113751184537486e-06, "loss": 1.6298, "step": 9140 }, { "epoch": 0.2664572243069494, "grad_norm": 12.0, "learning_rate": 9.111811902864871e-06, "loss": 1.7246, "step": 9160 }, { "epoch": 0.2670390086394973, "grad_norm": 10.625, "learning_rate": 9.109872621192256e-06, "loss": 1.658, "step": 9180 }, { "epoch": 0.2676207929720453, "grad_norm": 12.875, "learning_rate": 9.107933339519642e-06, "loss": 1.6617, "step": 9200 }, { "epoch": 0.2682025773045932, "grad_norm": 12.125, "learning_rate": 9.105994057847027e-06, "loss": 1.7339, "step": 9220 }, { "epoch": 0.2687843616371411, "grad_norm": 15.3125, "learning_rate": 9.104054776174412e-06, "loss": 1.7189, "step": 9240 }, { "epoch": 0.26936614596968905, "grad_norm": 12.6875, "learning_rate": 9.102115494501797e-06, "loss": 1.6779, "step": 9260 }, { "epoch": 0.26994793030223696, "grad_norm": 10.75, "learning_rate": 9.100176212829182e-06, "loss": 1.6748, "step": 9280 }, { "epoch": 0.27052971463478487, "grad_norm": 13.875, "learning_rate": 9.098236931156567e-06, "loss": 1.6947, "step": 9300 }, { "epoch": 0.27111149896733283, "grad_norm": 13.8125, "learning_rate": 9.096297649483952e-06, "loss": 1.6742, "step": 9320 }, { "epoch": 0.27169328329988074, "grad_norm": 12.625, "learning_rate": 9.094358367811337e-06, "loss": 1.7339, "step": 9340 }, { "epoch": 0.27227506763242865, "grad_norm": 14.1875, "learning_rate": 9.092419086138722e-06, "loss": 1.6263, "step": 9360 }, { "epoch": 0.2728568519649766, "grad_norm": 11.75, "learning_rate": 9.090479804466107e-06, "loss": 1.6642, "step": 9380 }, { "epoch": 0.2734386362975245, "grad_norm": 11.6875, "learning_rate": 9.088540522793493e-06, "loss": 1.684, "step": 9400 }, { "epoch": 0.2740204206300724, "grad_norm": 13.125, "learning_rate": 9.086601241120878e-06, "loss": 1.6405, "step": 9420 }, { "epoch": 0.27460220496262033, "grad_norm": 19.375, "learning_rate": 9.084661959448263e-06, "loss": 1.712, "step": 9440 }, { "epoch": 0.2751839892951683, "grad_norm": 9.6875, "learning_rate": 9.082722677775648e-06, "loss": 1.7061, "step": 9460 }, { "epoch": 0.2757657736277162, "grad_norm": 11.875, "learning_rate": 9.080783396103033e-06, "loss": 1.7192, "step": 9480 }, { "epoch": 0.2763475579602641, "grad_norm": 11.875, "learning_rate": 9.078844114430418e-06, "loss": 1.6492, "step": 9500 }, { "epoch": 0.2769293422928121, "grad_norm": 11.6875, "learning_rate": 9.076904832757803e-06, "loss": 1.714, "step": 9520 }, { "epoch": 0.27751112662536, "grad_norm": 12.5, "learning_rate": 9.074965551085188e-06, "loss": 1.6707, "step": 9540 }, { "epoch": 0.2780929109579079, "grad_norm": 12.8125, "learning_rate": 9.073026269412573e-06, "loss": 1.748, "step": 9560 }, { "epoch": 0.27867469529045585, "grad_norm": 13.6875, "learning_rate": 9.071086987739958e-06, "loss": 1.6314, "step": 9580 }, { "epoch": 0.27925647962300376, "grad_norm": 14.6875, "learning_rate": 9.069147706067344e-06, "loss": 1.7624, "step": 9600 }, { "epoch": 0.27983826395555167, "grad_norm": 14.125, "learning_rate": 9.067208424394729e-06, "loss": 1.5784, "step": 9620 }, { "epoch": 0.2804200482880996, "grad_norm": 13.75, "learning_rate": 9.065269142722114e-06, "loss": 1.6444, "step": 9640 }, { "epoch": 0.28100183262064754, "grad_norm": 13.75, "learning_rate": 9.063329861049499e-06, "loss": 1.6215, "step": 9660 }, { "epoch": 0.28158361695319545, "grad_norm": 12.3125, "learning_rate": 9.061390579376884e-06, "loss": 1.7362, "step": 9680 }, { "epoch": 0.28216540128574336, "grad_norm": 11.125, "learning_rate": 9.059451297704269e-06, "loss": 1.6616, "step": 9700 }, { "epoch": 0.2827471856182913, "grad_norm": 15.5, "learning_rate": 9.057512016031654e-06, "loss": 1.5841, "step": 9720 }, { "epoch": 0.2833289699508392, "grad_norm": 13.0625, "learning_rate": 9.05557273435904e-06, "loss": 1.5656, "step": 9740 }, { "epoch": 0.28391075428338713, "grad_norm": 14.375, "learning_rate": 9.053633452686424e-06, "loss": 1.6562, "step": 9760 }, { "epoch": 0.2844925386159351, "grad_norm": 11.5625, "learning_rate": 9.05169417101381e-06, "loss": 1.7046, "step": 9780 }, { "epoch": 0.285074322948483, "grad_norm": 9.875, "learning_rate": 9.049754889341195e-06, "loss": 1.6812, "step": 9800 }, { "epoch": 0.2856561072810309, "grad_norm": 12.6875, "learning_rate": 9.04781560766858e-06, "loss": 1.6193, "step": 9820 }, { "epoch": 0.2862378916135788, "grad_norm": 10.875, "learning_rate": 9.045876325995965e-06, "loss": 1.5845, "step": 9840 }, { "epoch": 0.2868196759461268, "grad_norm": 10.625, "learning_rate": 9.04393704432335e-06, "loss": 1.7287, "step": 9860 }, { "epoch": 0.2874014602786747, "grad_norm": 12.9375, "learning_rate": 9.041997762650735e-06, "loss": 1.6522, "step": 9880 }, { "epoch": 0.2879832446112226, "grad_norm": 13.25, "learning_rate": 9.04005848097812e-06, "loss": 1.638, "step": 9900 }, { "epoch": 0.28856502894377056, "grad_norm": 13.4375, "learning_rate": 9.038119199305505e-06, "loss": 1.726, "step": 9920 }, { "epoch": 0.28914681327631847, "grad_norm": 14.4375, "learning_rate": 9.03617991763289e-06, "loss": 1.7284, "step": 9940 }, { "epoch": 0.2897285976088664, "grad_norm": 14.3125, "learning_rate": 9.034240635960275e-06, "loss": 1.6854, "step": 9960 }, { "epoch": 0.29031038194141434, "grad_norm": 15.125, "learning_rate": 9.03230135428766e-06, "loss": 1.7404, "step": 9980 }, { "epoch": 0.29089216627396225, "grad_norm": 12.125, "learning_rate": 9.030362072615045e-06, "loss": 1.6527, "step": 10000 }, { "epoch": 0.29147395060651016, "grad_norm": 13.0625, "learning_rate": 9.02842279094243e-06, "loss": 1.708, "step": 10020 }, { "epoch": 0.29205573493905806, "grad_norm": 16.125, "learning_rate": 9.026483509269816e-06, "loss": 1.6867, "step": 10040 }, { "epoch": 0.29263751927160603, "grad_norm": 13.5, "learning_rate": 9.0245442275972e-06, "loss": 1.6858, "step": 10060 }, { "epoch": 0.29321930360415394, "grad_norm": 16.375, "learning_rate": 9.022604945924586e-06, "loss": 1.6886, "step": 10080 }, { "epoch": 0.29380108793670184, "grad_norm": 13.3125, "learning_rate": 9.020665664251971e-06, "loss": 1.6543, "step": 10100 }, { "epoch": 0.2943828722692498, "grad_norm": 13.25, "learning_rate": 9.018726382579356e-06, "loss": 1.6289, "step": 10120 }, { "epoch": 0.2949646566017977, "grad_norm": 10.75, "learning_rate": 9.016787100906741e-06, "loss": 1.6614, "step": 10140 }, { "epoch": 0.2955464409343456, "grad_norm": 13.3125, "learning_rate": 9.014847819234126e-06, "loss": 1.6518, "step": 10160 }, { "epoch": 0.2961282252668936, "grad_norm": 12.75, "learning_rate": 9.012908537561511e-06, "loss": 1.6559, "step": 10180 }, { "epoch": 0.2967100095994415, "grad_norm": 14.75, "learning_rate": 9.010969255888896e-06, "loss": 1.6365, "step": 10200 }, { "epoch": 0.2972917939319894, "grad_norm": 12.5, "learning_rate": 9.009029974216282e-06, "loss": 1.7192, "step": 10220 }, { "epoch": 0.2978735782645373, "grad_norm": 13.25, "learning_rate": 9.007090692543667e-06, "loss": 1.6718, "step": 10240 }, { "epoch": 0.29845536259708527, "grad_norm": 10.5625, "learning_rate": 9.005151410871052e-06, "loss": 1.6284, "step": 10260 }, { "epoch": 0.2990371469296332, "grad_norm": 13.3125, "learning_rate": 9.003212129198437e-06, "loss": 1.6932, "step": 10280 }, { "epoch": 0.2996189312621811, "grad_norm": 11.0625, "learning_rate": 9.001272847525822e-06, "loss": 1.6853, "step": 10300 }, { "epoch": 0.30020071559472905, "grad_norm": 12.25, "learning_rate": 8.999333565853207e-06, "loss": 1.6984, "step": 10320 }, { "epoch": 0.30078249992727696, "grad_norm": 10.75, "learning_rate": 8.997394284180592e-06, "loss": 1.6524, "step": 10340 }, { "epoch": 0.30136428425982487, "grad_norm": 12.75, "learning_rate": 8.995455002507977e-06, "loss": 1.6859, "step": 10360 }, { "epoch": 0.30194606859237283, "grad_norm": 13.5, "learning_rate": 8.993515720835362e-06, "loss": 1.5609, "step": 10380 }, { "epoch": 0.30252785292492074, "grad_norm": 11.8125, "learning_rate": 8.991576439162747e-06, "loss": 1.6768, "step": 10400 }, { "epoch": 0.30310963725746864, "grad_norm": 12.5625, "learning_rate": 8.989637157490133e-06, "loss": 1.6377, "step": 10420 }, { "epoch": 0.3036914215900166, "grad_norm": 11.0625, "learning_rate": 8.987697875817518e-06, "loss": 1.7098, "step": 10440 }, { "epoch": 0.3042732059225645, "grad_norm": 11.6875, "learning_rate": 8.985758594144903e-06, "loss": 1.6467, "step": 10460 }, { "epoch": 0.3048549902551124, "grad_norm": 15.5, "learning_rate": 8.983819312472288e-06, "loss": 1.6654, "step": 10480 }, { "epoch": 0.30543677458766033, "grad_norm": 12.625, "learning_rate": 8.981880030799673e-06, "loss": 1.7498, "step": 10500 }, { "epoch": 0.3060185589202083, "grad_norm": 15.75, "learning_rate": 8.979940749127058e-06, "loss": 1.6435, "step": 10520 }, { "epoch": 0.3066003432527562, "grad_norm": 12.0625, "learning_rate": 8.978001467454443e-06, "loss": 1.6964, "step": 10540 }, { "epoch": 0.3071821275853041, "grad_norm": 13.875, "learning_rate": 8.976062185781828e-06, "loss": 1.6594, "step": 10560 }, { "epoch": 0.3077639119178521, "grad_norm": 13.1875, "learning_rate": 8.974122904109213e-06, "loss": 1.6804, "step": 10580 }, { "epoch": 0.3083456962504, "grad_norm": 10.25, "learning_rate": 8.972183622436598e-06, "loss": 1.735, "step": 10600 }, { "epoch": 0.3089274805829479, "grad_norm": 13.5625, "learning_rate": 8.970244340763983e-06, "loss": 1.6542, "step": 10620 }, { "epoch": 0.30950926491549585, "grad_norm": 9.75, "learning_rate": 8.968305059091369e-06, "loss": 1.5944, "step": 10640 }, { "epoch": 0.31009104924804376, "grad_norm": 14.125, "learning_rate": 8.966365777418754e-06, "loss": 1.653, "step": 10660 }, { "epoch": 0.31067283358059167, "grad_norm": 13.125, "learning_rate": 8.964426495746139e-06, "loss": 1.6451, "step": 10680 }, { "epoch": 0.3112546179131396, "grad_norm": 16.125, "learning_rate": 8.962487214073524e-06, "loss": 1.7036, "step": 10700 }, { "epoch": 0.31183640224568754, "grad_norm": 11.875, "learning_rate": 8.960547932400909e-06, "loss": 1.6025, "step": 10720 }, { "epoch": 0.31241818657823545, "grad_norm": 13.5, "learning_rate": 8.958608650728292e-06, "loss": 1.6561, "step": 10740 }, { "epoch": 0.31299997091078335, "grad_norm": 11.9375, "learning_rate": 8.956669369055677e-06, "loss": 1.5292, "step": 10760 }, { "epoch": 0.3135817552433313, "grad_norm": 10.5625, "learning_rate": 8.954730087383063e-06, "loss": 1.6855, "step": 10780 }, { "epoch": 0.3141635395758792, "grad_norm": 11.4375, "learning_rate": 8.952790805710448e-06, "loss": 1.6713, "step": 10800 }, { "epoch": 0.31474532390842713, "grad_norm": 16.625, "learning_rate": 8.950851524037833e-06, "loss": 1.7154, "step": 10820 }, { "epoch": 0.3153271082409751, "grad_norm": 13.875, "learning_rate": 8.948912242365218e-06, "loss": 1.6844, "step": 10840 }, { "epoch": 0.315908892573523, "grad_norm": 12.1875, "learning_rate": 8.946972960692603e-06, "loss": 1.6695, "step": 10860 }, { "epoch": 0.3164906769060709, "grad_norm": 12.1875, "learning_rate": 8.945033679019988e-06, "loss": 1.7592, "step": 10880 }, { "epoch": 0.3170724612386188, "grad_norm": 14.5625, "learning_rate": 8.943094397347373e-06, "loss": 1.5866, "step": 10900 }, { "epoch": 0.3176542455711668, "grad_norm": 14.0, "learning_rate": 8.941155115674758e-06, "loss": 1.7119, "step": 10920 }, { "epoch": 0.3182360299037147, "grad_norm": 12.375, "learning_rate": 8.939215834002143e-06, "loss": 1.6872, "step": 10940 }, { "epoch": 0.3188178142362626, "grad_norm": 13.875, "learning_rate": 8.937276552329528e-06, "loss": 1.736, "step": 10960 }, { "epoch": 0.31939959856881056, "grad_norm": 12.9375, "learning_rate": 8.935337270656914e-06, "loss": 1.7081, "step": 10980 }, { "epoch": 0.31998138290135847, "grad_norm": 12.6875, "learning_rate": 8.933397988984299e-06, "loss": 1.7345, "step": 11000 }, { "epoch": 0.3205631672339064, "grad_norm": 12.4375, "learning_rate": 8.931458707311684e-06, "loss": 1.7188, "step": 11020 }, { "epoch": 0.32114495156645434, "grad_norm": 12.0, "learning_rate": 8.929519425639069e-06, "loss": 1.6173, "step": 11040 }, { "epoch": 0.32172673589900225, "grad_norm": 15.75, "learning_rate": 8.927580143966454e-06, "loss": 1.6647, "step": 11060 }, { "epoch": 0.32230852023155016, "grad_norm": 11.6875, "learning_rate": 8.925640862293839e-06, "loss": 1.6596, "step": 11080 }, { "epoch": 0.32289030456409806, "grad_norm": 10.875, "learning_rate": 8.923701580621224e-06, "loss": 1.6738, "step": 11100 }, { "epoch": 0.323472088896646, "grad_norm": 12.6875, "learning_rate": 8.92176229894861e-06, "loss": 1.7109, "step": 11120 }, { "epoch": 0.32405387322919393, "grad_norm": 13.625, "learning_rate": 8.919823017275994e-06, "loss": 1.6099, "step": 11140 }, { "epoch": 0.32463565756174184, "grad_norm": 13.125, "learning_rate": 8.91788373560338e-06, "loss": 1.5996, "step": 11160 }, { "epoch": 0.3252174418942898, "grad_norm": 11.875, "learning_rate": 8.915944453930765e-06, "loss": 1.6787, "step": 11180 }, { "epoch": 0.3257992262268377, "grad_norm": 11.9375, "learning_rate": 8.91400517225815e-06, "loss": 1.5997, "step": 11200 }, { "epoch": 0.3263810105593856, "grad_norm": 12.5, "learning_rate": 8.912065890585535e-06, "loss": 1.7873, "step": 11220 }, { "epoch": 0.3269627948919336, "grad_norm": 11.5, "learning_rate": 8.91012660891292e-06, "loss": 1.7129, "step": 11240 }, { "epoch": 0.3275445792244815, "grad_norm": 15.4375, "learning_rate": 8.908187327240305e-06, "loss": 1.6616, "step": 11260 }, { "epoch": 0.3281263635570294, "grad_norm": 13.0625, "learning_rate": 8.90624804556769e-06, "loss": 1.6324, "step": 11280 }, { "epoch": 0.3287081478895773, "grad_norm": 11.875, "learning_rate": 8.904308763895075e-06, "loss": 1.62, "step": 11300 }, { "epoch": 0.32928993222212527, "grad_norm": 21.75, "learning_rate": 8.90236948222246e-06, "loss": 1.7055, "step": 11320 }, { "epoch": 0.3298717165546732, "grad_norm": 11.75, "learning_rate": 8.900430200549845e-06, "loss": 1.6479, "step": 11340 }, { "epoch": 0.3304535008872211, "grad_norm": 11.875, "learning_rate": 8.89849091887723e-06, "loss": 1.7365, "step": 11360 }, { "epoch": 0.33103528521976905, "grad_norm": 12.375, "learning_rate": 8.896551637204616e-06, "loss": 1.7232, "step": 11380 }, { "epoch": 0.33161706955231696, "grad_norm": 9.5, "learning_rate": 8.894612355532e-06, "loss": 1.6588, "step": 11400 }, { "epoch": 0.33219885388486486, "grad_norm": 13.0625, "learning_rate": 8.892673073859386e-06, "loss": 1.6273, "step": 11420 }, { "epoch": 0.3327806382174128, "grad_norm": 12.0, "learning_rate": 8.89073379218677e-06, "loss": 1.6455, "step": 11440 }, { "epoch": 0.33336242254996074, "grad_norm": 10.6875, "learning_rate": 8.888794510514156e-06, "loss": 1.8009, "step": 11460 }, { "epoch": 0.33394420688250864, "grad_norm": 12.125, "learning_rate": 8.886855228841541e-06, "loss": 1.7571, "step": 11480 }, { "epoch": 0.3345259912150566, "grad_norm": 11.0, "learning_rate": 8.884915947168926e-06, "loss": 1.7168, "step": 11500 }, { "epoch": 0.3351077755476045, "grad_norm": 16.5, "learning_rate": 8.882976665496311e-06, "loss": 1.607, "step": 11520 }, { "epoch": 0.3356895598801524, "grad_norm": 12.75, "learning_rate": 8.881037383823696e-06, "loss": 1.65, "step": 11540 }, { "epoch": 0.33627134421270033, "grad_norm": 13.75, "learning_rate": 8.879098102151081e-06, "loss": 1.5926, "step": 11560 }, { "epoch": 0.3368531285452483, "grad_norm": 13.625, "learning_rate": 8.877158820478466e-06, "loss": 1.6459, "step": 11580 }, { "epoch": 0.3374349128777962, "grad_norm": 12.4375, "learning_rate": 8.875219538805852e-06, "loss": 1.5444, "step": 11600 }, { "epoch": 0.3380166972103441, "grad_norm": 11.75, "learning_rate": 8.873280257133237e-06, "loss": 1.6608, "step": 11620 }, { "epoch": 0.33859848154289207, "grad_norm": 13.5625, "learning_rate": 8.871340975460622e-06, "loss": 1.642, "step": 11640 }, { "epoch": 0.33918026587544, "grad_norm": 13.1875, "learning_rate": 8.869401693788007e-06, "loss": 1.7346, "step": 11660 }, { "epoch": 0.3397620502079879, "grad_norm": 12.6875, "learning_rate": 8.867462412115392e-06, "loss": 1.5846, "step": 11680 }, { "epoch": 0.34034383454053585, "grad_norm": 13.1875, "learning_rate": 8.865523130442777e-06, "loss": 1.5694, "step": 11700 }, { "epoch": 0.34092561887308376, "grad_norm": 13.875, "learning_rate": 8.863583848770162e-06, "loss": 1.5998, "step": 11720 }, { "epoch": 0.34150740320563167, "grad_norm": 13.25, "learning_rate": 8.861644567097547e-06, "loss": 1.7103, "step": 11740 }, { "epoch": 0.3420891875381796, "grad_norm": 12.125, "learning_rate": 8.859705285424932e-06, "loss": 1.616, "step": 11760 }, { "epoch": 0.34267097187072754, "grad_norm": 12.3125, "learning_rate": 8.857766003752317e-06, "loss": 1.7097, "step": 11780 }, { "epoch": 0.34325275620327544, "grad_norm": 15.3125, "learning_rate": 8.855826722079703e-06, "loss": 1.733, "step": 11800 }, { "epoch": 0.34383454053582335, "grad_norm": 15.25, "learning_rate": 8.853887440407088e-06, "loss": 1.5843, "step": 11820 }, { "epoch": 0.3444163248683713, "grad_norm": 11.9375, "learning_rate": 8.851948158734473e-06, "loss": 1.685, "step": 11840 }, { "epoch": 0.3449981092009192, "grad_norm": 11.9375, "learning_rate": 8.850008877061858e-06, "loss": 1.5462, "step": 11860 }, { "epoch": 0.34557989353346713, "grad_norm": 12.5, "learning_rate": 8.848069595389243e-06, "loss": 1.7058, "step": 11880 }, { "epoch": 0.3461616778660151, "grad_norm": 10.8125, "learning_rate": 8.846130313716628e-06, "loss": 1.6387, "step": 11900 }, { "epoch": 0.346743462198563, "grad_norm": 12.8125, "learning_rate": 8.844191032044013e-06, "loss": 1.6074, "step": 11920 }, { "epoch": 0.3473252465311109, "grad_norm": 13.3125, "learning_rate": 8.842251750371398e-06, "loss": 1.6532, "step": 11940 }, { "epoch": 0.3479070308636588, "grad_norm": 11.5625, "learning_rate": 8.840312468698783e-06, "loss": 1.6338, "step": 11960 }, { "epoch": 0.3484888151962068, "grad_norm": 14.3125, "learning_rate": 8.838373187026168e-06, "loss": 1.7099, "step": 11980 }, { "epoch": 0.3490705995287547, "grad_norm": 12.1875, "learning_rate": 8.836433905353554e-06, "loss": 1.6162, "step": 12000 }, { "epoch": 0.3496523838613026, "grad_norm": 10.75, "learning_rate": 8.834494623680939e-06, "loss": 1.6681, "step": 12020 }, { "epoch": 0.35023416819385056, "grad_norm": 9.8125, "learning_rate": 8.832555342008324e-06, "loss": 1.6138, "step": 12040 }, { "epoch": 0.35081595252639847, "grad_norm": 14.9375, "learning_rate": 8.830616060335709e-06, "loss": 1.5801, "step": 12060 }, { "epoch": 0.3513977368589464, "grad_norm": 13.6875, "learning_rate": 8.828676778663092e-06, "loss": 1.6548, "step": 12080 }, { "epoch": 0.35197952119149434, "grad_norm": 12.3125, "learning_rate": 8.826737496990477e-06, "loss": 1.6574, "step": 12100 }, { "epoch": 0.35256130552404225, "grad_norm": 12.125, "learning_rate": 8.824798215317862e-06, "loss": 1.6967, "step": 12120 }, { "epoch": 0.35314308985659015, "grad_norm": 16.125, "learning_rate": 8.822858933645248e-06, "loss": 1.6683, "step": 12140 }, { "epoch": 0.35372487418913806, "grad_norm": 12.0625, "learning_rate": 8.820919651972633e-06, "loss": 1.7342, "step": 12160 }, { "epoch": 0.354306658521686, "grad_norm": 13.0625, "learning_rate": 8.818980370300018e-06, "loss": 1.7129, "step": 12180 }, { "epoch": 0.35488844285423393, "grad_norm": 13.0, "learning_rate": 8.817041088627403e-06, "loss": 1.6718, "step": 12200 }, { "epoch": 0.35547022718678184, "grad_norm": 15.125, "learning_rate": 8.815101806954788e-06, "loss": 1.6067, "step": 12220 }, { "epoch": 0.3560520115193298, "grad_norm": 10.9375, "learning_rate": 8.813162525282173e-06, "loss": 1.6761, "step": 12240 }, { "epoch": 0.3566337958518777, "grad_norm": 11.25, "learning_rate": 8.811223243609558e-06, "loss": 1.7204, "step": 12260 }, { "epoch": 0.3572155801844256, "grad_norm": 14.4375, "learning_rate": 8.809283961936943e-06, "loss": 1.6675, "step": 12280 }, { "epoch": 0.3577973645169736, "grad_norm": 12.0, "learning_rate": 8.807344680264328e-06, "loss": 1.6827, "step": 12300 }, { "epoch": 0.3583791488495215, "grad_norm": 12.625, "learning_rate": 8.805405398591713e-06, "loss": 1.7405, "step": 12320 }, { "epoch": 0.3589609331820694, "grad_norm": 14.5625, "learning_rate": 8.803466116919098e-06, "loss": 1.6716, "step": 12340 }, { "epoch": 0.3595427175146173, "grad_norm": 12.4375, "learning_rate": 8.801526835246484e-06, "loss": 1.6382, "step": 12360 }, { "epoch": 0.36012450184716527, "grad_norm": 11.375, "learning_rate": 8.799587553573869e-06, "loss": 1.7131, "step": 12380 }, { "epoch": 0.3607062861797132, "grad_norm": 12.875, "learning_rate": 8.797648271901254e-06, "loss": 1.607, "step": 12400 }, { "epoch": 0.3612880705122611, "grad_norm": 12.0625, "learning_rate": 8.795708990228639e-06, "loss": 1.7249, "step": 12420 }, { "epoch": 0.36186985484480905, "grad_norm": 10.0625, "learning_rate": 8.793769708556024e-06, "loss": 1.7111, "step": 12440 }, { "epoch": 0.36245163917735695, "grad_norm": 13.375, "learning_rate": 8.791830426883409e-06, "loss": 1.6574, "step": 12460 }, { "epoch": 0.36303342350990486, "grad_norm": 11.5625, "learning_rate": 8.789891145210794e-06, "loss": 1.7416, "step": 12480 }, { "epoch": 0.3636152078424528, "grad_norm": 11.0, "learning_rate": 8.78795186353818e-06, "loss": 1.6661, "step": 12500 }, { "epoch": 0.36419699217500073, "grad_norm": 11.625, "learning_rate": 8.786012581865564e-06, "loss": 1.7245, "step": 12520 }, { "epoch": 0.36477877650754864, "grad_norm": 15.25, "learning_rate": 8.78407330019295e-06, "loss": 1.6578, "step": 12540 }, { "epoch": 0.36536056084009655, "grad_norm": 15.1875, "learning_rate": 8.782134018520335e-06, "loss": 1.708, "step": 12560 }, { "epoch": 0.3659423451726445, "grad_norm": 12.375, "learning_rate": 8.78019473684772e-06, "loss": 1.7078, "step": 12580 }, { "epoch": 0.3665241295051924, "grad_norm": 12.1875, "learning_rate": 8.778255455175105e-06, "loss": 1.6425, "step": 12600 }, { "epoch": 0.3671059138377403, "grad_norm": 11.5, "learning_rate": 8.77631617350249e-06, "loss": 1.8068, "step": 12620 }, { "epoch": 0.3676876981702883, "grad_norm": 12.4375, "learning_rate": 8.774376891829875e-06, "loss": 1.6618, "step": 12640 }, { "epoch": 0.3682694825028362, "grad_norm": 12.25, "learning_rate": 8.77243761015726e-06, "loss": 1.5911, "step": 12660 }, { "epoch": 0.3688512668353841, "grad_norm": 13.75, "learning_rate": 8.770498328484645e-06, "loss": 1.5994, "step": 12680 }, { "epoch": 0.36943305116793207, "grad_norm": 12.75, "learning_rate": 8.76855904681203e-06, "loss": 1.6917, "step": 12700 }, { "epoch": 0.37001483550048, "grad_norm": 10.0, "learning_rate": 8.766619765139415e-06, "loss": 1.6336, "step": 12720 }, { "epoch": 0.3705966198330279, "grad_norm": 13.625, "learning_rate": 8.7646804834668e-06, "loss": 1.7117, "step": 12740 }, { "epoch": 0.37117840416557585, "grad_norm": 12.5, "learning_rate": 8.762741201794186e-06, "loss": 1.6783, "step": 12760 }, { "epoch": 0.37176018849812376, "grad_norm": 12.9375, "learning_rate": 8.76080192012157e-06, "loss": 1.6464, "step": 12780 }, { "epoch": 0.37234197283067166, "grad_norm": 11.3125, "learning_rate": 8.758862638448956e-06, "loss": 1.7115, "step": 12800 }, { "epoch": 0.37292375716321957, "grad_norm": 11.0625, "learning_rate": 8.75692335677634e-06, "loss": 1.6268, "step": 12820 }, { "epoch": 0.37350554149576753, "grad_norm": 11.1875, "learning_rate": 8.754984075103726e-06, "loss": 1.5796, "step": 12840 }, { "epoch": 0.37408732582831544, "grad_norm": 13.3125, "learning_rate": 8.753044793431111e-06, "loss": 1.6056, "step": 12860 }, { "epoch": 0.37466911016086335, "grad_norm": 15.9375, "learning_rate": 8.751105511758496e-06, "loss": 1.6311, "step": 12880 }, { "epoch": 0.3752508944934113, "grad_norm": 14.8125, "learning_rate": 8.749166230085881e-06, "loss": 1.6125, "step": 12900 }, { "epoch": 0.3758326788259592, "grad_norm": 11.4375, "learning_rate": 8.747226948413266e-06, "loss": 1.6119, "step": 12920 }, { "epoch": 0.37641446315850713, "grad_norm": 14.5625, "learning_rate": 8.74528766674065e-06, "loss": 1.6739, "step": 12940 }, { "epoch": 0.3769962474910551, "grad_norm": 9.25, "learning_rate": 8.743348385068035e-06, "loss": 1.7119, "step": 12960 }, { "epoch": 0.377578031823603, "grad_norm": 13.0, "learning_rate": 8.74140910339542e-06, "loss": 1.6802, "step": 12980 }, { "epoch": 0.3781598161561509, "grad_norm": 12.1875, "learning_rate": 8.739469821722805e-06, "loss": 1.5522, "step": 13000 }, { "epoch": 0.3787416004886988, "grad_norm": 16.375, "learning_rate": 8.73753054005019e-06, "loss": 1.6771, "step": 13020 }, { "epoch": 0.3793233848212468, "grad_norm": 10.5625, "learning_rate": 8.735591258377575e-06, "loss": 1.6635, "step": 13040 }, { "epoch": 0.3799051691537947, "grad_norm": 10.8125, "learning_rate": 8.73365197670496e-06, "loss": 1.6157, "step": 13060 }, { "epoch": 0.3804869534863426, "grad_norm": 11.125, "learning_rate": 8.731712695032345e-06, "loss": 1.6574, "step": 13080 }, { "epoch": 0.38106873781889056, "grad_norm": 15.8125, "learning_rate": 8.72977341335973e-06, "loss": 1.6298, "step": 13100 }, { "epoch": 0.38165052215143846, "grad_norm": 13.0, "learning_rate": 8.727834131687116e-06, "loss": 1.749, "step": 13120 }, { "epoch": 0.3822323064839864, "grad_norm": 16.625, "learning_rate": 8.7258948500145e-06, "loss": 1.736, "step": 13140 }, { "epoch": 0.38281409081653434, "grad_norm": 11.875, "learning_rate": 8.723955568341886e-06, "loss": 1.6702, "step": 13160 }, { "epoch": 0.38339587514908224, "grad_norm": 12.75, "learning_rate": 8.722016286669271e-06, "loss": 1.6749, "step": 13180 }, { "epoch": 0.38397765948163015, "grad_norm": 12.875, "learning_rate": 8.720077004996656e-06, "loss": 1.7487, "step": 13200 }, { "epoch": 0.38455944381417806, "grad_norm": 12.5625, "learning_rate": 8.718137723324041e-06, "loss": 1.6517, "step": 13220 }, { "epoch": 0.385141228146726, "grad_norm": 11.5, "learning_rate": 8.716198441651426e-06, "loss": 1.6829, "step": 13240 }, { "epoch": 0.38572301247927393, "grad_norm": 12.875, "learning_rate": 8.714259159978811e-06, "loss": 1.6287, "step": 13260 }, { "epoch": 0.38630479681182184, "grad_norm": 13.1875, "learning_rate": 8.712319878306196e-06, "loss": 1.7027, "step": 13280 }, { "epoch": 0.3868865811443698, "grad_norm": 12.75, "learning_rate": 8.710380596633581e-06, "loss": 1.762, "step": 13300 }, { "epoch": 0.3874683654769177, "grad_norm": 15.3125, "learning_rate": 8.708441314960967e-06, "loss": 1.6645, "step": 13320 }, { "epoch": 0.3880501498094656, "grad_norm": 14.0, "learning_rate": 8.706502033288352e-06, "loss": 1.6303, "step": 13340 }, { "epoch": 0.3886319341420136, "grad_norm": 11.4375, "learning_rate": 8.704562751615737e-06, "loss": 1.6327, "step": 13360 }, { "epoch": 0.3892137184745615, "grad_norm": 10.8125, "learning_rate": 8.702623469943122e-06, "loss": 1.7711, "step": 13380 }, { "epoch": 0.3897955028071094, "grad_norm": 12.75, "learning_rate": 8.700684188270507e-06, "loss": 1.697, "step": 13400 }, { "epoch": 0.3903772871396573, "grad_norm": 12.1875, "learning_rate": 8.698744906597892e-06, "loss": 1.6974, "step": 13420 }, { "epoch": 0.39095907147220527, "grad_norm": 10.875, "learning_rate": 8.696805624925277e-06, "loss": 1.6802, "step": 13440 }, { "epoch": 0.3915408558047532, "grad_norm": 7.59375, "learning_rate": 8.694866343252662e-06, "loss": 1.6845, "step": 13460 }, { "epoch": 0.3921226401373011, "grad_norm": 12.9375, "learning_rate": 8.692927061580047e-06, "loss": 1.7635, "step": 13480 }, { "epoch": 0.39270442446984904, "grad_norm": 13.5, "learning_rate": 8.690987779907432e-06, "loss": 1.6645, "step": 13500 }, { "epoch": 0.39328620880239695, "grad_norm": 13.375, "learning_rate": 8.689048498234818e-06, "loss": 1.6436, "step": 13520 }, { "epoch": 0.39386799313494486, "grad_norm": 11.75, "learning_rate": 8.687109216562203e-06, "loss": 1.6567, "step": 13540 }, { "epoch": 0.3944497774674928, "grad_norm": 11.0625, "learning_rate": 8.685169934889588e-06, "loss": 1.6634, "step": 13560 }, { "epoch": 0.39503156180004073, "grad_norm": 11.3125, "learning_rate": 8.683230653216973e-06, "loss": 1.6337, "step": 13580 }, { "epoch": 0.39561334613258864, "grad_norm": 10.75, "learning_rate": 8.681291371544358e-06, "loss": 1.6324, "step": 13600 }, { "epoch": 0.39619513046513655, "grad_norm": 12.0625, "learning_rate": 8.679352089871743e-06, "loss": 1.7301, "step": 13620 }, { "epoch": 0.3967769147976845, "grad_norm": 12.1875, "learning_rate": 8.677412808199128e-06, "loss": 1.6491, "step": 13640 }, { "epoch": 0.3973586991302324, "grad_norm": 10.5625, "learning_rate": 8.675473526526513e-06, "loss": 1.6356, "step": 13660 }, { "epoch": 0.3979404834627803, "grad_norm": 13.5625, "learning_rate": 8.673534244853898e-06, "loss": 1.6638, "step": 13680 }, { "epoch": 0.3985222677953283, "grad_norm": 12.5, "learning_rate": 8.671594963181283e-06, "loss": 1.6306, "step": 13700 }, { "epoch": 0.3991040521278762, "grad_norm": 11.875, "learning_rate": 8.669655681508669e-06, "loss": 1.6401, "step": 13720 }, { "epoch": 0.3996858364604241, "grad_norm": 14.1875, "learning_rate": 8.667716399836054e-06, "loss": 1.7083, "step": 13740 }, { "epoch": 0.40026762079297207, "grad_norm": 11.0, "learning_rate": 8.665777118163439e-06, "loss": 1.6409, "step": 13760 }, { "epoch": 0.40084940512552, "grad_norm": 12.625, "learning_rate": 8.663837836490824e-06, "loss": 1.6309, "step": 13780 }, { "epoch": 0.4014311894580679, "grad_norm": 14.0625, "learning_rate": 8.661898554818209e-06, "loss": 1.6464, "step": 13800 }, { "epoch": 0.40201297379061585, "grad_norm": 12.5625, "learning_rate": 8.659959273145594e-06, "loss": 1.6834, "step": 13820 }, { "epoch": 0.40259475812316375, "grad_norm": 10.9375, "learning_rate": 8.658019991472979e-06, "loss": 1.5307, "step": 13840 }, { "epoch": 0.40317654245571166, "grad_norm": 12.0, "learning_rate": 8.656080709800364e-06, "loss": 1.6924, "step": 13860 }, { "epoch": 0.40375832678825957, "grad_norm": 14.0, "learning_rate": 8.65414142812775e-06, "loss": 1.687, "step": 13880 }, { "epoch": 0.40434011112080753, "grad_norm": 12.5625, "learning_rate": 8.652202146455134e-06, "loss": 1.6729, "step": 13900 }, { "epoch": 0.40492189545335544, "grad_norm": 10.6875, "learning_rate": 8.65026286478252e-06, "loss": 1.6525, "step": 13920 }, { "epoch": 0.40550367978590335, "grad_norm": 11.8125, "learning_rate": 8.648323583109905e-06, "loss": 1.6706, "step": 13940 }, { "epoch": 0.4060854641184513, "grad_norm": 16.25, "learning_rate": 8.64638430143729e-06, "loss": 1.7039, "step": 13960 }, { "epoch": 0.4066672484509992, "grad_norm": 13.9375, "learning_rate": 8.644445019764675e-06, "loss": 1.6011, "step": 13980 }, { "epoch": 0.4072490327835471, "grad_norm": 10.5625, "learning_rate": 8.64250573809206e-06, "loss": 1.6004, "step": 14000 }, { "epoch": 0.4078308171160951, "grad_norm": 13.875, "learning_rate": 8.640566456419445e-06, "loss": 1.6513, "step": 14020 }, { "epoch": 0.408412601448643, "grad_norm": 13.125, "learning_rate": 8.63862717474683e-06, "loss": 1.637, "step": 14040 }, { "epoch": 0.4089943857811909, "grad_norm": 11.375, "learning_rate": 8.636687893074215e-06, "loss": 1.6971, "step": 14060 }, { "epoch": 0.4095761701137388, "grad_norm": 11.375, "learning_rate": 8.6347486114016e-06, "loss": 1.6934, "step": 14080 }, { "epoch": 0.4101579544462868, "grad_norm": 14.1875, "learning_rate": 8.632809329728985e-06, "loss": 1.6761, "step": 14100 }, { "epoch": 0.4107397387788347, "grad_norm": 11.125, "learning_rate": 8.63087004805637e-06, "loss": 1.7236, "step": 14120 }, { "epoch": 0.4113215231113826, "grad_norm": 12.8125, "learning_rate": 8.628930766383756e-06, "loss": 1.7004, "step": 14140 }, { "epoch": 0.41190330744393056, "grad_norm": 11.625, "learning_rate": 8.62699148471114e-06, "loss": 1.5457, "step": 14160 }, { "epoch": 0.41248509177647846, "grad_norm": 14.375, "learning_rate": 8.625052203038526e-06, "loss": 1.5415, "step": 14180 }, { "epoch": 0.41306687610902637, "grad_norm": 10.5625, "learning_rate": 8.623112921365911e-06, "loss": 1.7406, "step": 14200 }, { "epoch": 0.41364866044157433, "grad_norm": 19.625, "learning_rate": 8.621173639693296e-06, "loss": 1.675, "step": 14220 }, { "epoch": 0.41423044477412224, "grad_norm": 12.875, "learning_rate": 8.619234358020681e-06, "loss": 1.7145, "step": 14240 }, { "epoch": 0.41481222910667015, "grad_norm": 10.75, "learning_rate": 8.617295076348066e-06, "loss": 1.6529, "step": 14260 }, { "epoch": 0.41539401343921806, "grad_norm": 11.625, "learning_rate": 8.615355794675451e-06, "loss": 1.644, "step": 14280 }, { "epoch": 0.415975797771766, "grad_norm": 13.3125, "learning_rate": 8.613416513002836e-06, "loss": 1.6978, "step": 14300 }, { "epoch": 0.41655758210431393, "grad_norm": 13.3125, "learning_rate": 8.611477231330221e-06, "loss": 1.683, "step": 14320 }, { "epoch": 0.41713936643686184, "grad_norm": 11.25, "learning_rate": 8.609537949657607e-06, "loss": 1.6661, "step": 14340 }, { "epoch": 0.4177211507694098, "grad_norm": 12.75, "learning_rate": 8.607598667984992e-06, "loss": 1.6891, "step": 14360 }, { "epoch": 0.4183029351019577, "grad_norm": 13.5, "learning_rate": 8.605659386312377e-06, "loss": 1.6421, "step": 14380 }, { "epoch": 0.4188847194345056, "grad_norm": 10.8125, "learning_rate": 8.603720104639762e-06, "loss": 1.6044, "step": 14400 }, { "epoch": 0.4194665037670536, "grad_norm": 13.3125, "learning_rate": 8.601780822967147e-06, "loss": 1.6135, "step": 14420 }, { "epoch": 0.4200482880996015, "grad_norm": 12.0, "learning_rate": 8.599841541294532e-06, "loss": 1.6754, "step": 14440 }, { "epoch": 0.4206300724321494, "grad_norm": 13.25, "learning_rate": 8.597902259621917e-06, "loss": 1.6302, "step": 14460 }, { "epoch": 0.4212118567646973, "grad_norm": 11.0625, "learning_rate": 8.595962977949302e-06, "loss": 1.5996, "step": 14480 }, { "epoch": 0.42179364109724526, "grad_norm": 12.25, "learning_rate": 8.594023696276687e-06, "loss": 1.6721, "step": 14500 }, { "epoch": 0.42237542542979317, "grad_norm": 11.8125, "learning_rate": 8.592084414604072e-06, "loss": 1.6192, "step": 14520 }, { "epoch": 0.4229572097623411, "grad_norm": 14.0, "learning_rate": 8.590145132931458e-06, "loss": 1.5439, "step": 14540 }, { "epoch": 0.42353899409488904, "grad_norm": 12.5, "learning_rate": 8.588205851258841e-06, "loss": 1.661, "step": 14560 }, { "epoch": 0.42412077842743695, "grad_norm": 11.4375, "learning_rate": 8.586266569586226e-06, "loss": 1.6487, "step": 14580 }, { "epoch": 0.42470256275998486, "grad_norm": 15.6875, "learning_rate": 8.584327287913611e-06, "loss": 1.6176, "step": 14600 }, { "epoch": 0.4252843470925328, "grad_norm": 12.5, "learning_rate": 8.582388006240996e-06, "loss": 1.6658, "step": 14620 }, { "epoch": 0.42586613142508073, "grad_norm": 12.4375, "learning_rate": 8.580448724568381e-06, "loss": 1.6275, "step": 14640 }, { "epoch": 0.42644791575762864, "grad_norm": 13.4375, "learning_rate": 8.578509442895766e-06, "loss": 1.6152, "step": 14660 }, { "epoch": 0.42702970009017654, "grad_norm": 11.5, "learning_rate": 8.576570161223152e-06, "loss": 1.6167, "step": 14680 }, { "epoch": 0.4276114844227245, "grad_norm": 13.5, "learning_rate": 8.574630879550537e-06, "loss": 1.605, "step": 14700 }, { "epoch": 0.4281932687552724, "grad_norm": 16.0, "learning_rate": 8.572691597877922e-06, "loss": 1.6955, "step": 14720 }, { "epoch": 0.4287750530878203, "grad_norm": 13.6875, "learning_rate": 8.570752316205307e-06, "loss": 1.5291, "step": 14740 }, { "epoch": 0.4293568374203683, "grad_norm": 12.125, "learning_rate": 8.568813034532692e-06, "loss": 1.7469, "step": 14760 }, { "epoch": 0.4299386217529162, "grad_norm": 11.5625, "learning_rate": 8.566873752860077e-06, "loss": 1.7228, "step": 14780 }, { "epoch": 0.4305204060854641, "grad_norm": 14.75, "learning_rate": 8.564934471187462e-06, "loss": 1.6517, "step": 14800 }, { "epoch": 0.43110219041801207, "grad_norm": 11.0, "learning_rate": 8.562995189514847e-06, "loss": 1.5406, "step": 14820 }, { "epoch": 0.43168397475056, "grad_norm": 12.25, "learning_rate": 8.561055907842232e-06, "loss": 1.6401, "step": 14840 }, { "epoch": 0.4322657590831079, "grad_norm": 12.0625, "learning_rate": 8.559116626169617e-06, "loss": 1.6032, "step": 14860 }, { "epoch": 0.4328475434156558, "grad_norm": 12.5, "learning_rate": 8.557177344497002e-06, "loss": 1.7027, "step": 14880 }, { "epoch": 0.43342932774820375, "grad_norm": 11.8125, "learning_rate": 8.555238062824388e-06, "loss": 1.6566, "step": 14900 }, { "epoch": 0.43401111208075166, "grad_norm": 13.1875, "learning_rate": 8.553298781151773e-06, "loss": 1.6975, "step": 14920 }, { "epoch": 0.43459289641329957, "grad_norm": 11.8125, "learning_rate": 8.551359499479158e-06, "loss": 1.6332, "step": 14940 }, { "epoch": 0.43517468074584753, "grad_norm": 12.5, "learning_rate": 8.549420217806543e-06, "loss": 1.6447, "step": 14960 }, { "epoch": 0.43575646507839544, "grad_norm": 13.0625, "learning_rate": 8.547480936133928e-06, "loss": 1.7399, "step": 14980 }, { "epoch": 0.43633824941094335, "grad_norm": 12.6875, "learning_rate": 8.545541654461313e-06, "loss": 1.7293, "step": 15000 }, { "epoch": 0.4369200337434913, "grad_norm": 13.0625, "learning_rate": 8.543602372788698e-06, "loss": 1.6866, "step": 15020 }, { "epoch": 0.4375018180760392, "grad_norm": 13.9375, "learning_rate": 8.541663091116083e-06, "loss": 1.6139, "step": 15040 }, { "epoch": 0.4380836024085871, "grad_norm": 12.9375, "learning_rate": 8.539723809443468e-06, "loss": 1.6182, "step": 15060 }, { "epoch": 0.4386653867411351, "grad_norm": 13.375, "learning_rate": 8.537784527770853e-06, "loss": 1.6955, "step": 15080 }, { "epoch": 0.439247171073683, "grad_norm": 12.6875, "learning_rate": 8.535845246098239e-06, "loss": 1.5942, "step": 15100 }, { "epoch": 0.4398289554062309, "grad_norm": 12.5, "learning_rate": 8.533905964425624e-06, "loss": 1.6648, "step": 15120 }, { "epoch": 0.4404107397387788, "grad_norm": 12.25, "learning_rate": 8.531966682753009e-06, "loss": 1.6914, "step": 15140 }, { "epoch": 0.4409925240713268, "grad_norm": 11.9375, "learning_rate": 8.530027401080394e-06, "loss": 1.6168, "step": 15160 }, { "epoch": 0.4415743084038747, "grad_norm": 11.5625, "learning_rate": 8.528088119407779e-06, "loss": 1.7224, "step": 15180 }, { "epoch": 0.4421560927364226, "grad_norm": 13.6875, "learning_rate": 8.526148837735164e-06, "loss": 1.6296, "step": 15200 }, { "epoch": 0.44273787706897055, "grad_norm": 12.25, "learning_rate": 8.524209556062549e-06, "loss": 1.5624, "step": 15220 }, { "epoch": 0.44331966140151846, "grad_norm": 12.5625, "learning_rate": 8.522270274389934e-06, "loss": 1.6387, "step": 15240 }, { "epoch": 0.44390144573406637, "grad_norm": 14.375, "learning_rate": 8.52033099271732e-06, "loss": 1.6439, "step": 15260 }, { "epoch": 0.44448323006661433, "grad_norm": 11.1875, "learning_rate": 8.518391711044704e-06, "loss": 1.697, "step": 15280 }, { "epoch": 0.44506501439916224, "grad_norm": 13.4375, "learning_rate": 8.51645242937209e-06, "loss": 1.7002, "step": 15300 }, { "epoch": 0.44564679873171015, "grad_norm": 13.875, "learning_rate": 8.514513147699475e-06, "loss": 1.6779, "step": 15320 }, { "epoch": 0.44622858306425806, "grad_norm": 12.8125, "learning_rate": 8.51257386602686e-06, "loss": 1.6922, "step": 15340 }, { "epoch": 0.446810367396806, "grad_norm": 11.3125, "learning_rate": 8.510634584354245e-06, "loss": 1.6771, "step": 15360 }, { "epoch": 0.4473921517293539, "grad_norm": 14.1875, "learning_rate": 8.50869530268163e-06, "loss": 1.7192, "step": 15380 }, { "epoch": 0.44797393606190183, "grad_norm": 15.25, "learning_rate": 8.506756021009015e-06, "loss": 1.6994, "step": 15400 }, { "epoch": 0.4485557203944498, "grad_norm": 11.125, "learning_rate": 8.5048167393364e-06, "loss": 1.7224, "step": 15420 }, { "epoch": 0.4491375047269977, "grad_norm": 13.5625, "learning_rate": 8.502877457663785e-06, "loss": 1.596, "step": 15440 }, { "epoch": 0.4497192890595456, "grad_norm": 13.375, "learning_rate": 8.50093817599117e-06, "loss": 1.626, "step": 15460 }, { "epoch": 0.4503010733920936, "grad_norm": 11.125, "learning_rate": 8.498998894318555e-06, "loss": 1.6653, "step": 15480 }, { "epoch": 0.4508828577246415, "grad_norm": 11.375, "learning_rate": 8.49705961264594e-06, "loss": 1.6906, "step": 15500 }, { "epoch": 0.4514646420571894, "grad_norm": 12.1875, "learning_rate": 8.495120330973326e-06, "loss": 1.5862, "step": 15520 }, { "epoch": 0.4520464263897373, "grad_norm": 12.125, "learning_rate": 8.49318104930071e-06, "loss": 1.624, "step": 15540 }, { "epoch": 0.45262821072228526, "grad_norm": 10.875, "learning_rate": 8.491241767628096e-06, "loss": 1.6233, "step": 15560 }, { "epoch": 0.45320999505483317, "grad_norm": 11.0, "learning_rate": 8.489302485955481e-06, "loss": 1.6777, "step": 15580 }, { "epoch": 0.4537917793873811, "grad_norm": 12.0, "learning_rate": 8.487363204282866e-06, "loss": 1.6539, "step": 15600 }, { "epoch": 0.45437356371992904, "grad_norm": 12.25, "learning_rate": 8.485423922610251e-06, "loss": 1.6355, "step": 15620 }, { "epoch": 0.45495534805247695, "grad_norm": 13.3125, "learning_rate": 8.483484640937636e-06, "loss": 1.7094, "step": 15640 }, { "epoch": 0.45553713238502486, "grad_norm": 12.625, "learning_rate": 8.481545359265021e-06, "loss": 1.6191, "step": 15660 }, { "epoch": 0.4561189167175728, "grad_norm": 12.75, "learning_rate": 8.479606077592406e-06, "loss": 1.6599, "step": 15680 }, { "epoch": 0.4567007010501207, "grad_norm": 11.4375, "learning_rate": 8.477666795919791e-06, "loss": 1.6194, "step": 15700 }, { "epoch": 0.45728248538266864, "grad_norm": 12.375, "learning_rate": 8.475727514247177e-06, "loss": 1.7066, "step": 15720 }, { "epoch": 0.45786426971521654, "grad_norm": 13.5625, "learning_rate": 8.473788232574562e-06, "loss": 1.6937, "step": 15740 }, { "epoch": 0.4584460540477645, "grad_norm": 10.625, "learning_rate": 8.471848950901947e-06, "loss": 1.6028, "step": 15760 }, { "epoch": 0.4590278383803124, "grad_norm": 12.6875, "learning_rate": 8.469909669229332e-06, "loss": 1.7428, "step": 15780 }, { "epoch": 0.4596096227128603, "grad_norm": 12.75, "learning_rate": 8.467970387556717e-06, "loss": 1.6265, "step": 15800 }, { "epoch": 0.4601914070454083, "grad_norm": 13.5, "learning_rate": 8.466031105884102e-06, "loss": 1.7238, "step": 15820 }, { "epoch": 0.4607731913779562, "grad_norm": 17.5, "learning_rate": 8.464091824211487e-06, "loss": 1.6624, "step": 15840 }, { "epoch": 0.4613549757105041, "grad_norm": 8.875, "learning_rate": 8.462152542538872e-06, "loss": 1.6134, "step": 15860 }, { "epoch": 0.46193676004305206, "grad_norm": 11.4375, "learning_rate": 8.460213260866257e-06, "loss": 1.6501, "step": 15880 }, { "epoch": 0.46251854437559997, "grad_norm": 15.25, "learning_rate": 8.458273979193642e-06, "loss": 1.6335, "step": 15900 }, { "epoch": 0.4631003287081479, "grad_norm": 13.25, "learning_rate": 8.456334697521028e-06, "loss": 1.6504, "step": 15920 }, { "epoch": 0.4636821130406958, "grad_norm": 13.4375, "learning_rate": 8.454395415848413e-06, "loss": 1.5638, "step": 15940 }, { "epoch": 0.46426389737324375, "grad_norm": 13.5625, "learning_rate": 8.452456134175798e-06, "loss": 1.7835, "step": 15960 }, { "epoch": 0.46484568170579166, "grad_norm": 13.5625, "learning_rate": 8.450516852503183e-06, "loss": 1.6515, "step": 15980 }, { "epoch": 0.46542746603833957, "grad_norm": 12.0, "learning_rate": 8.448577570830568e-06, "loss": 1.6011, "step": 16000 }, { "epoch": 0.46600925037088753, "grad_norm": 14.6875, "learning_rate": 8.446638289157953e-06, "loss": 1.637, "step": 16020 }, { "epoch": 0.46659103470343544, "grad_norm": 12.125, "learning_rate": 8.444699007485338e-06, "loss": 1.7068, "step": 16040 }, { "epoch": 0.46717281903598334, "grad_norm": 12.875, "learning_rate": 8.442759725812723e-06, "loss": 1.6211, "step": 16060 }, { "epoch": 0.4677546033685313, "grad_norm": 13.6875, "learning_rate": 8.440820444140108e-06, "loss": 1.5866, "step": 16080 }, { "epoch": 0.4683363877010792, "grad_norm": 11.8125, "learning_rate": 8.438881162467493e-06, "loss": 1.721, "step": 16100 }, { "epoch": 0.4689181720336271, "grad_norm": 10.9375, "learning_rate": 8.436941880794879e-06, "loss": 1.6362, "step": 16120 }, { "epoch": 0.4694999563661751, "grad_norm": 12.1875, "learning_rate": 8.435002599122264e-06, "loss": 1.6794, "step": 16140 }, { "epoch": 0.470081740698723, "grad_norm": 14.375, "learning_rate": 8.433063317449649e-06, "loss": 1.6712, "step": 16160 }, { "epoch": 0.4706635250312709, "grad_norm": 14.5625, "learning_rate": 8.431124035777034e-06, "loss": 1.6059, "step": 16180 }, { "epoch": 0.4712453093638188, "grad_norm": 10.8125, "learning_rate": 8.429184754104417e-06, "loss": 1.6212, "step": 16200 }, { "epoch": 0.4718270936963668, "grad_norm": 13.9375, "learning_rate": 8.427245472431802e-06, "loss": 1.6766, "step": 16220 }, { "epoch": 0.4724088780289147, "grad_norm": 11.625, "learning_rate": 8.425306190759187e-06, "loss": 1.6919, "step": 16240 }, { "epoch": 0.4729906623614626, "grad_norm": 12.0, "learning_rate": 8.423366909086573e-06, "loss": 1.6914, "step": 16260 }, { "epoch": 0.47357244669401055, "grad_norm": 11.5, "learning_rate": 8.421427627413958e-06, "loss": 1.7106, "step": 16280 }, { "epoch": 0.47415423102655846, "grad_norm": 12.3125, "learning_rate": 8.419488345741343e-06, "loss": 1.6553, "step": 16300 }, { "epoch": 0.47473601535910637, "grad_norm": 11.9375, "learning_rate": 8.417549064068728e-06, "loss": 1.6824, "step": 16320 }, { "epoch": 0.47531779969165433, "grad_norm": 14.0, "learning_rate": 8.415609782396113e-06, "loss": 1.6005, "step": 16340 }, { "epoch": 0.47589958402420224, "grad_norm": 12.8125, "learning_rate": 8.413670500723498e-06, "loss": 1.6092, "step": 16360 }, { "epoch": 0.47648136835675015, "grad_norm": 21.25, "learning_rate": 8.411731219050883e-06, "loss": 1.6701, "step": 16380 }, { "epoch": 0.47706315268929805, "grad_norm": 14.625, "learning_rate": 8.409791937378268e-06, "loss": 1.6229, "step": 16400 }, { "epoch": 0.477644937021846, "grad_norm": 11.375, "learning_rate": 8.407852655705653e-06, "loss": 1.5859, "step": 16420 }, { "epoch": 0.4782267213543939, "grad_norm": 12.0625, "learning_rate": 8.405913374033038e-06, "loss": 1.6195, "step": 16440 }, { "epoch": 0.47880850568694183, "grad_norm": 10.6875, "learning_rate": 8.403974092360423e-06, "loss": 1.7007, "step": 16460 }, { "epoch": 0.4793902900194898, "grad_norm": 14.125, "learning_rate": 8.402034810687809e-06, "loss": 1.6735, "step": 16480 }, { "epoch": 0.4799720743520377, "grad_norm": 12.5, "learning_rate": 8.400095529015194e-06, "loss": 1.6816, "step": 16500 }, { "epoch": 0.4805538586845856, "grad_norm": 10.875, "learning_rate": 8.398156247342579e-06, "loss": 1.6985, "step": 16520 }, { "epoch": 0.4811356430171336, "grad_norm": 11.75, "learning_rate": 8.396216965669964e-06, "loss": 1.7448, "step": 16540 }, { "epoch": 0.4817174273496815, "grad_norm": 13.375, "learning_rate": 8.394277683997349e-06, "loss": 1.6185, "step": 16560 }, { "epoch": 0.4822992116822294, "grad_norm": 14.875, "learning_rate": 8.392338402324734e-06, "loss": 1.6197, "step": 16580 }, { "epoch": 0.4828809960147773, "grad_norm": 13.75, "learning_rate": 8.39039912065212e-06, "loss": 1.6744, "step": 16600 }, { "epoch": 0.48346278034732526, "grad_norm": 11.875, "learning_rate": 8.388459838979504e-06, "loss": 1.6772, "step": 16620 }, { "epoch": 0.48404456467987317, "grad_norm": 13.625, "learning_rate": 8.38652055730689e-06, "loss": 1.67, "step": 16640 }, { "epoch": 0.4846263490124211, "grad_norm": 13.0625, "learning_rate": 8.384581275634274e-06, "loss": 1.6667, "step": 16660 }, { "epoch": 0.48520813334496904, "grad_norm": 14.3125, "learning_rate": 8.38264199396166e-06, "loss": 1.7014, "step": 16680 }, { "epoch": 0.48578991767751695, "grad_norm": 14.375, "learning_rate": 8.380702712289045e-06, "loss": 1.6499, "step": 16700 }, { "epoch": 0.48637170201006485, "grad_norm": 14.0625, "learning_rate": 8.37876343061643e-06, "loss": 1.6073, "step": 16720 }, { "epoch": 0.4869534863426128, "grad_norm": 13.8125, "learning_rate": 8.376824148943815e-06, "loss": 1.6416, "step": 16740 }, { "epoch": 0.4875352706751607, "grad_norm": 15.5625, "learning_rate": 8.374884867271198e-06, "loss": 1.6485, "step": 16760 }, { "epoch": 0.48811705500770863, "grad_norm": 12.3125, "learning_rate": 8.372945585598583e-06, "loss": 1.7104, "step": 16780 }, { "epoch": 0.48869883934025654, "grad_norm": 10.6875, "learning_rate": 8.371006303925968e-06, "loss": 1.5394, "step": 16800 }, { "epoch": 0.4892806236728045, "grad_norm": 14.0, "learning_rate": 8.369067022253354e-06, "loss": 1.5488, "step": 16820 }, { "epoch": 0.4898624080053524, "grad_norm": 12.625, "learning_rate": 8.367127740580739e-06, "loss": 1.5977, "step": 16840 }, { "epoch": 0.4904441923379003, "grad_norm": 11.875, "learning_rate": 8.365188458908124e-06, "loss": 1.5608, "step": 16860 }, { "epoch": 0.4910259766704483, "grad_norm": 13.3125, "learning_rate": 8.363249177235509e-06, "loss": 1.6741, "step": 16880 }, { "epoch": 0.4916077610029962, "grad_norm": 13.3125, "learning_rate": 8.361309895562894e-06, "loss": 1.6442, "step": 16900 }, { "epoch": 0.4921895453355441, "grad_norm": 12.25, "learning_rate": 8.359370613890279e-06, "loss": 1.6331, "step": 16920 }, { "epoch": 0.49277132966809206, "grad_norm": 11.75, "learning_rate": 8.357431332217664e-06, "loss": 1.627, "step": 16940 }, { "epoch": 0.49335311400063997, "grad_norm": 12.6875, "learning_rate": 8.35549205054505e-06, "loss": 1.6895, "step": 16960 }, { "epoch": 0.4939348983331879, "grad_norm": 10.0625, "learning_rate": 8.353552768872434e-06, "loss": 1.6495, "step": 16980 }, { "epoch": 0.4945166826657358, "grad_norm": 12.25, "learning_rate": 8.35161348719982e-06, "loss": 1.6889, "step": 17000 }, { "epoch": 0.49509846699828375, "grad_norm": 14.25, "learning_rate": 8.349674205527205e-06, "loss": 1.6643, "step": 17020 }, { "epoch": 0.49568025133083166, "grad_norm": 13.625, "learning_rate": 8.34773492385459e-06, "loss": 1.572, "step": 17040 }, { "epoch": 0.49626203566337956, "grad_norm": 11.4375, "learning_rate": 8.345795642181975e-06, "loss": 1.6003, "step": 17060 }, { "epoch": 0.4968438199959275, "grad_norm": 13.25, "learning_rate": 8.34385636050936e-06, "loss": 1.7059, "step": 17080 }, { "epoch": 0.49742560432847543, "grad_norm": 12.75, "learning_rate": 8.341917078836745e-06, "loss": 1.5876, "step": 17100 }, { "epoch": 0.49800738866102334, "grad_norm": 12.0, "learning_rate": 8.33997779716413e-06, "loss": 1.6523, "step": 17120 }, { "epoch": 0.4985891729935713, "grad_norm": 12.8125, "learning_rate": 8.338038515491515e-06, "loss": 1.7046, "step": 17140 }, { "epoch": 0.4991709573261192, "grad_norm": 11.4375, "learning_rate": 8.3360992338189e-06, "loss": 1.5631, "step": 17160 }, { "epoch": 0.4997527416586671, "grad_norm": 13.3125, "learning_rate": 8.334159952146285e-06, "loss": 1.585, "step": 17180 }, { "epoch": 0.5003345259912151, "grad_norm": 13.5, "learning_rate": 8.33222067047367e-06, "loss": 1.7361, "step": 17200 }, { "epoch": 0.5009163103237629, "grad_norm": 14.875, "learning_rate": 8.330281388801056e-06, "loss": 1.6168, "step": 17220 }, { "epoch": 0.5014980946563109, "grad_norm": 11.6875, "learning_rate": 8.32834210712844e-06, "loss": 1.6471, "step": 17240 }, { "epoch": 0.5020798789888589, "grad_norm": 12.3125, "learning_rate": 8.326402825455826e-06, "loss": 1.6724, "step": 17260 }, { "epoch": 0.5026616633214067, "grad_norm": 11.1875, "learning_rate": 8.32446354378321e-06, "loss": 1.6533, "step": 17280 }, { "epoch": 0.5032434476539547, "grad_norm": 10.9375, "learning_rate": 8.322524262110596e-06, "loss": 1.5692, "step": 17300 }, { "epoch": 0.5038252319865026, "grad_norm": 13.75, "learning_rate": 8.320584980437981e-06, "loss": 1.6431, "step": 17320 }, { "epoch": 0.5044070163190505, "grad_norm": 12.1875, "learning_rate": 8.318645698765366e-06, "loss": 1.6197, "step": 17340 }, { "epoch": 0.5049888006515985, "grad_norm": 14.5, "learning_rate": 8.316706417092751e-06, "loss": 1.6078, "step": 17360 }, { "epoch": 0.5055705849841464, "grad_norm": 12.75, "learning_rate": 8.314767135420136e-06, "loss": 1.7085, "step": 17380 }, { "epoch": 0.5061523693166943, "grad_norm": 14.9375, "learning_rate": 8.312827853747521e-06, "loss": 1.5984, "step": 17400 }, { "epoch": 0.5067341536492422, "grad_norm": 10.75, "learning_rate": 8.310888572074906e-06, "loss": 1.5446, "step": 17420 }, { "epoch": 0.5073159379817902, "grad_norm": 10.6875, "learning_rate": 8.308949290402292e-06, "loss": 1.6235, "step": 17440 }, { "epoch": 0.507897722314338, "grad_norm": 14.4375, "learning_rate": 8.307010008729677e-06, "loss": 1.6053, "step": 17460 }, { "epoch": 0.508479506646886, "grad_norm": 12.5625, "learning_rate": 8.305070727057062e-06, "loss": 1.5673, "step": 17480 }, { "epoch": 0.509061290979434, "grad_norm": 12.125, "learning_rate": 8.303131445384447e-06, "loss": 1.6572, "step": 17500 }, { "epoch": 0.5096430753119818, "grad_norm": 14.0625, "learning_rate": 8.301192163711832e-06, "loss": 1.6572, "step": 17520 }, { "epoch": 0.5102248596445298, "grad_norm": 12.9375, "learning_rate": 8.299252882039217e-06, "loss": 1.6663, "step": 17540 }, { "epoch": 0.5108066439770776, "grad_norm": 11.8125, "learning_rate": 8.297313600366602e-06, "loss": 1.6452, "step": 17560 }, { "epoch": 0.5113884283096256, "grad_norm": 14.625, "learning_rate": 8.295374318693987e-06, "loss": 1.6668, "step": 17580 }, { "epoch": 0.5119702126421736, "grad_norm": 10.8125, "learning_rate": 8.293435037021372e-06, "loss": 1.5655, "step": 17600 }, { "epoch": 0.5125519969747214, "grad_norm": 12.6875, "learning_rate": 8.291495755348757e-06, "loss": 1.761, "step": 17620 }, { "epoch": 0.5131337813072694, "grad_norm": 12.75, "learning_rate": 8.289556473676143e-06, "loss": 1.6808, "step": 17640 }, { "epoch": 0.5137155656398174, "grad_norm": 16.0, "learning_rate": 8.287617192003528e-06, "loss": 1.6404, "step": 17660 }, { "epoch": 0.5142973499723652, "grad_norm": 11.4375, "learning_rate": 8.285677910330913e-06, "loss": 1.7102, "step": 17680 }, { "epoch": 0.5148791343049132, "grad_norm": 10.75, "learning_rate": 8.283738628658298e-06, "loss": 1.5917, "step": 17700 }, { "epoch": 0.5154609186374611, "grad_norm": 13.375, "learning_rate": 8.281799346985683e-06, "loss": 1.6896, "step": 17720 }, { "epoch": 0.516042702970009, "grad_norm": 12.3125, "learning_rate": 8.279860065313068e-06, "loss": 1.6722, "step": 17740 }, { "epoch": 0.516624487302557, "grad_norm": 12.75, "learning_rate": 8.277920783640453e-06, "loss": 1.6643, "step": 17760 }, { "epoch": 0.5172062716351049, "grad_norm": 10.25, "learning_rate": 8.275981501967838e-06, "loss": 1.6292, "step": 17780 }, { "epoch": 0.5177880559676528, "grad_norm": 13.0, "learning_rate": 8.274042220295223e-06, "loss": 1.664, "step": 17800 }, { "epoch": 0.5183698403002007, "grad_norm": 12.25, "learning_rate": 8.272102938622608e-06, "loss": 1.558, "step": 17820 }, { "epoch": 0.5189516246327487, "grad_norm": 13.0625, "learning_rate": 8.270163656949994e-06, "loss": 1.6129, "step": 17840 }, { "epoch": 0.5195334089652965, "grad_norm": 14.3125, "learning_rate": 8.268224375277379e-06, "loss": 1.6074, "step": 17860 }, { "epoch": 0.5201151932978445, "grad_norm": 9.625, "learning_rate": 8.266285093604764e-06, "loss": 1.6244, "step": 17880 }, { "epoch": 0.5206969776303925, "grad_norm": 12.625, "learning_rate": 8.264345811932149e-06, "loss": 1.595, "step": 17900 }, { "epoch": 0.5212787619629403, "grad_norm": 11.0625, "learning_rate": 8.262406530259534e-06, "loss": 1.5696, "step": 17920 }, { "epoch": 0.5218605462954883, "grad_norm": 11.1875, "learning_rate": 8.260467248586919e-06, "loss": 1.6314, "step": 17940 }, { "epoch": 0.5224423306280361, "grad_norm": 11.75, "learning_rate": 8.258527966914304e-06, "loss": 1.6623, "step": 17960 }, { "epoch": 0.5230241149605841, "grad_norm": 10.5, "learning_rate": 8.25658868524169e-06, "loss": 1.6871, "step": 17980 }, { "epoch": 0.5236058992931321, "grad_norm": 12.375, "learning_rate": 8.254649403569074e-06, "loss": 1.6504, "step": 18000 }, { "epoch": 0.5241876836256799, "grad_norm": 12.1875, "learning_rate": 8.25271012189646e-06, "loss": 1.5599, "step": 18020 }, { "epoch": 0.5247694679582279, "grad_norm": 15.25, "learning_rate": 8.250770840223844e-06, "loss": 1.6048, "step": 18040 }, { "epoch": 0.5253512522907758, "grad_norm": 14.3125, "learning_rate": 8.24883155855123e-06, "loss": 1.6068, "step": 18060 }, { "epoch": 0.5259330366233237, "grad_norm": 11.625, "learning_rate": 8.246892276878615e-06, "loss": 1.6132, "step": 18080 }, { "epoch": 0.5265148209558717, "grad_norm": 12.875, "learning_rate": 8.244952995206e-06, "loss": 1.5648, "step": 18100 }, { "epoch": 0.5270966052884196, "grad_norm": 13.1875, "learning_rate": 8.243013713533385e-06, "loss": 1.6553, "step": 18120 }, { "epoch": 0.5276783896209675, "grad_norm": 11.5, "learning_rate": 8.24107443186077e-06, "loss": 1.5852, "step": 18140 }, { "epoch": 0.5282601739535154, "grad_norm": 13.625, "learning_rate": 8.239135150188155e-06, "loss": 1.6632, "step": 18160 }, { "epoch": 0.5288419582860634, "grad_norm": 14.5625, "learning_rate": 8.23719586851554e-06, "loss": 1.6971, "step": 18180 }, { "epoch": 0.5294237426186112, "grad_norm": 17.75, "learning_rate": 8.235256586842925e-06, "loss": 1.7631, "step": 18200 }, { "epoch": 0.5300055269511592, "grad_norm": 14.625, "learning_rate": 8.23331730517031e-06, "loss": 1.6325, "step": 18220 }, { "epoch": 0.5305873112837072, "grad_norm": 13.3125, "learning_rate": 8.231378023497695e-06, "loss": 1.6736, "step": 18240 }, { "epoch": 0.531169095616255, "grad_norm": 12.625, "learning_rate": 8.22943874182508e-06, "loss": 1.6393, "step": 18260 }, { "epoch": 0.531750879948803, "grad_norm": 14.875, "learning_rate": 8.227499460152466e-06, "loss": 1.6378, "step": 18280 }, { "epoch": 0.532332664281351, "grad_norm": 14.75, "learning_rate": 8.22556017847985e-06, "loss": 1.6205, "step": 18300 }, { "epoch": 0.5329144486138988, "grad_norm": 12.625, "learning_rate": 8.223620896807236e-06, "loss": 1.6166, "step": 18320 }, { "epoch": 0.5334962329464468, "grad_norm": 13.5625, "learning_rate": 8.221681615134621e-06, "loss": 1.6742, "step": 18340 }, { "epoch": 0.5340780172789946, "grad_norm": 13.25, "learning_rate": 8.219742333462006e-06, "loss": 1.6505, "step": 18360 }, { "epoch": 0.5346598016115426, "grad_norm": 12.375, "learning_rate": 8.21780305178939e-06, "loss": 1.6826, "step": 18380 }, { "epoch": 0.5352415859440905, "grad_norm": 12.125, "learning_rate": 8.215863770116775e-06, "loss": 1.657, "step": 18400 }, { "epoch": 0.5358233702766384, "grad_norm": 14.0625, "learning_rate": 8.21392448844416e-06, "loss": 1.5712, "step": 18420 }, { "epoch": 0.5364051546091864, "grad_norm": 11.6875, "learning_rate": 8.211985206771545e-06, "loss": 1.7179, "step": 18440 }, { "epoch": 0.5369869389417343, "grad_norm": 13.5625, "learning_rate": 8.21004592509893e-06, "loss": 1.6749, "step": 18460 }, { "epoch": 0.5375687232742822, "grad_norm": 13.0625, "learning_rate": 8.208106643426315e-06, "loss": 1.6932, "step": 18480 }, { "epoch": 0.5381505076068301, "grad_norm": 8.8125, "learning_rate": 8.2061673617537e-06, "loss": 1.6385, "step": 18500 }, { "epoch": 0.5387322919393781, "grad_norm": 12.125, "learning_rate": 8.204228080081085e-06, "loss": 1.683, "step": 18520 }, { "epoch": 0.539314076271926, "grad_norm": 13.125, "learning_rate": 8.20228879840847e-06, "loss": 1.59, "step": 18540 }, { "epoch": 0.5398958606044739, "grad_norm": 12.4375, "learning_rate": 8.200349516735855e-06, "loss": 1.5987, "step": 18560 }, { "epoch": 0.5404776449370219, "grad_norm": 12.625, "learning_rate": 8.19841023506324e-06, "loss": 1.63, "step": 18580 }, { "epoch": 0.5410594292695697, "grad_norm": 11.6875, "learning_rate": 8.196470953390626e-06, "loss": 1.698, "step": 18600 }, { "epoch": 0.5416412136021177, "grad_norm": 12.1875, "learning_rate": 8.19453167171801e-06, "loss": 1.6323, "step": 18620 }, { "epoch": 0.5422229979346657, "grad_norm": 11.5625, "learning_rate": 8.192592390045396e-06, "loss": 1.6039, "step": 18640 }, { "epoch": 0.5428047822672135, "grad_norm": 10.3125, "learning_rate": 8.19065310837278e-06, "loss": 1.6931, "step": 18660 }, { "epoch": 0.5433865665997615, "grad_norm": 10.875, "learning_rate": 8.188713826700166e-06, "loss": 1.6977, "step": 18680 }, { "epoch": 0.5439683509323094, "grad_norm": 9.8125, "learning_rate": 8.186774545027551e-06, "loss": 1.5836, "step": 18700 }, { "epoch": 0.5445501352648573, "grad_norm": 12.6875, "learning_rate": 8.184835263354936e-06, "loss": 1.6545, "step": 18720 }, { "epoch": 0.5451319195974053, "grad_norm": 10.0, "learning_rate": 8.182895981682321e-06, "loss": 1.5713, "step": 18740 }, { "epoch": 0.5457137039299532, "grad_norm": 11.25, "learning_rate": 8.180956700009706e-06, "loss": 1.5724, "step": 18760 }, { "epoch": 0.5462954882625011, "grad_norm": 11.5, "learning_rate": 8.179017418337091e-06, "loss": 1.7166, "step": 18780 }, { "epoch": 0.546877272595049, "grad_norm": 11.9375, "learning_rate": 8.177078136664477e-06, "loss": 1.735, "step": 18800 }, { "epoch": 0.5474590569275969, "grad_norm": 12.0625, "learning_rate": 8.175138854991862e-06, "loss": 1.6193, "step": 18820 }, { "epoch": 0.5480408412601449, "grad_norm": 10.875, "learning_rate": 8.173199573319247e-06, "loss": 1.6175, "step": 18840 }, { "epoch": 0.5486226255926928, "grad_norm": 12.0625, "learning_rate": 8.171260291646632e-06, "loss": 1.748, "step": 18860 }, { "epoch": 0.5492044099252407, "grad_norm": 15.9375, "learning_rate": 8.169321009974017e-06, "loss": 1.66, "step": 18880 }, { "epoch": 0.5497861942577886, "grad_norm": 10.6875, "learning_rate": 8.167381728301402e-06, "loss": 1.7462, "step": 18900 }, { "epoch": 0.5503679785903366, "grad_norm": 12.125, "learning_rate": 8.165442446628787e-06, "loss": 1.6313, "step": 18920 }, { "epoch": 0.5509497629228844, "grad_norm": 10.8125, "learning_rate": 8.163503164956172e-06, "loss": 1.6117, "step": 18940 }, { "epoch": 0.5515315472554324, "grad_norm": 10.875, "learning_rate": 8.161563883283557e-06, "loss": 1.56, "step": 18960 }, { "epoch": 0.5521133315879804, "grad_norm": 12.875, "learning_rate": 8.159624601610942e-06, "loss": 1.6744, "step": 18980 }, { "epoch": 0.5526951159205282, "grad_norm": 13.25, "learning_rate": 8.157685319938327e-06, "loss": 1.6503, "step": 19000 }, { "epoch": 0.5532769002530762, "grad_norm": 10.125, "learning_rate": 8.155746038265713e-06, "loss": 1.6612, "step": 19020 }, { "epoch": 0.5538586845856242, "grad_norm": 14.6875, "learning_rate": 8.153806756593098e-06, "loss": 1.6828, "step": 19040 }, { "epoch": 0.554440468918172, "grad_norm": 14.75, "learning_rate": 8.151867474920483e-06, "loss": 1.6411, "step": 19060 }, { "epoch": 0.55502225325072, "grad_norm": 10.75, "learning_rate": 8.149928193247868e-06, "loss": 1.614, "step": 19080 }, { "epoch": 0.5556040375832679, "grad_norm": 12.25, "learning_rate": 8.147988911575253e-06, "loss": 1.5938, "step": 19100 }, { "epoch": 0.5561858219158158, "grad_norm": 13.0, "learning_rate": 8.146049629902638e-06, "loss": 1.6811, "step": 19120 }, { "epoch": 0.5567676062483637, "grad_norm": 11.375, "learning_rate": 8.144110348230023e-06, "loss": 1.6226, "step": 19140 }, { "epoch": 0.5573493905809117, "grad_norm": 10.5625, "learning_rate": 8.142171066557408e-06, "loss": 1.6727, "step": 19160 }, { "epoch": 0.5579311749134596, "grad_norm": 15.3125, "learning_rate": 8.140231784884793e-06, "loss": 1.6248, "step": 19180 }, { "epoch": 0.5585129592460075, "grad_norm": 10.1875, "learning_rate": 8.138292503212178e-06, "loss": 1.6383, "step": 19200 }, { "epoch": 0.5590947435785554, "grad_norm": 10.4375, "learning_rate": 8.136353221539564e-06, "loss": 1.6951, "step": 19220 }, { "epoch": 0.5596765279111033, "grad_norm": 12.0625, "learning_rate": 8.134413939866949e-06, "loss": 1.722, "step": 19240 }, { "epoch": 0.5602583122436513, "grad_norm": 14.625, "learning_rate": 8.132474658194334e-06, "loss": 1.6129, "step": 19260 }, { "epoch": 0.5608400965761992, "grad_norm": 13.0, "learning_rate": 8.130535376521719e-06, "loss": 1.6803, "step": 19280 }, { "epoch": 0.5614218809087471, "grad_norm": 12.75, "learning_rate": 8.128596094849104e-06, "loss": 1.6305, "step": 19300 }, { "epoch": 0.5620036652412951, "grad_norm": 13.3125, "learning_rate": 8.126656813176489e-06, "loss": 1.5954, "step": 19320 }, { "epoch": 0.5625854495738429, "grad_norm": 11.375, "learning_rate": 8.124717531503874e-06, "loss": 1.5627, "step": 19340 }, { "epoch": 0.5631672339063909, "grad_norm": 12.0625, "learning_rate": 8.12277824983126e-06, "loss": 1.6935, "step": 19360 }, { "epoch": 0.5637490182389389, "grad_norm": 11.25, "learning_rate": 8.120838968158644e-06, "loss": 1.6396, "step": 19380 }, { "epoch": 0.5643308025714867, "grad_norm": 13.0625, "learning_rate": 8.11889968648603e-06, "loss": 1.594, "step": 19400 }, { "epoch": 0.5649125869040347, "grad_norm": 10.8125, "learning_rate": 8.116960404813415e-06, "loss": 1.6131, "step": 19420 }, { "epoch": 0.5654943712365826, "grad_norm": 12.125, "learning_rate": 8.1150211231408e-06, "loss": 1.6273, "step": 19440 }, { "epoch": 0.5660761555691305, "grad_norm": 15.8125, "learning_rate": 8.113081841468185e-06, "loss": 1.7437, "step": 19460 }, { "epoch": 0.5666579399016785, "grad_norm": 11.8125, "learning_rate": 8.11114255979557e-06, "loss": 1.7092, "step": 19480 }, { "epoch": 0.5672397242342264, "grad_norm": 16.75, "learning_rate": 8.109203278122955e-06, "loss": 1.6279, "step": 19500 }, { "epoch": 0.5678215085667743, "grad_norm": 13.75, "learning_rate": 8.10726399645034e-06, "loss": 1.7001, "step": 19520 }, { "epoch": 0.5684032928993222, "grad_norm": 13.9375, "learning_rate": 8.105324714777725e-06, "loss": 1.694, "step": 19540 }, { "epoch": 0.5689850772318702, "grad_norm": 14.6875, "learning_rate": 8.10338543310511e-06, "loss": 1.6262, "step": 19560 }, { "epoch": 0.569566861564418, "grad_norm": 12.75, "learning_rate": 8.101446151432495e-06, "loss": 1.6449, "step": 19580 }, { "epoch": 0.570148645896966, "grad_norm": 12.9375, "learning_rate": 8.09950686975988e-06, "loss": 1.6502, "step": 19600 }, { "epoch": 0.570730430229514, "grad_norm": 16.0, "learning_rate": 8.097567588087266e-06, "loss": 1.6979, "step": 19620 }, { "epoch": 0.5713122145620618, "grad_norm": 14.25, "learning_rate": 8.09562830641465e-06, "loss": 1.6853, "step": 19640 }, { "epoch": 0.5718939988946098, "grad_norm": 11.75, "learning_rate": 8.093689024742036e-06, "loss": 1.7246, "step": 19660 }, { "epoch": 0.5724757832271576, "grad_norm": 12.8125, "learning_rate": 8.09174974306942e-06, "loss": 1.6622, "step": 19680 }, { "epoch": 0.5730575675597056, "grad_norm": 13.25, "learning_rate": 8.089810461396806e-06, "loss": 1.6474, "step": 19700 }, { "epoch": 0.5736393518922536, "grad_norm": 13.125, "learning_rate": 8.087871179724191e-06, "loss": 1.6399, "step": 19720 }, { "epoch": 0.5742211362248014, "grad_norm": 8.6875, "learning_rate": 8.085931898051576e-06, "loss": 1.5597, "step": 19740 }, { "epoch": 0.5748029205573494, "grad_norm": 13.9375, "learning_rate": 8.083992616378961e-06, "loss": 1.6188, "step": 19760 }, { "epoch": 0.5753847048898973, "grad_norm": 11.3125, "learning_rate": 8.082053334706346e-06, "loss": 1.6458, "step": 19780 }, { "epoch": 0.5759664892224452, "grad_norm": 14.6875, "learning_rate": 8.080114053033731e-06, "loss": 1.6713, "step": 19800 }, { "epoch": 0.5765482735549932, "grad_norm": 12.9375, "learning_rate": 8.078174771361116e-06, "loss": 1.6076, "step": 19820 }, { "epoch": 0.5771300578875411, "grad_norm": 12.875, "learning_rate": 8.076235489688502e-06, "loss": 1.6367, "step": 19840 }, { "epoch": 0.577711842220089, "grad_norm": 13.375, "learning_rate": 8.074296208015887e-06, "loss": 1.6454, "step": 19860 }, { "epoch": 0.5782936265526369, "grad_norm": 14.625, "learning_rate": 8.072356926343272e-06, "loss": 1.7048, "step": 19880 }, { "epoch": 0.5788754108851849, "grad_norm": 13.9375, "learning_rate": 8.070417644670657e-06, "loss": 1.5943, "step": 19900 }, { "epoch": 0.5794571952177328, "grad_norm": 14.5625, "learning_rate": 8.068478362998042e-06, "loss": 1.6203, "step": 19920 }, { "epoch": 0.5800389795502807, "grad_norm": 14.4375, "learning_rate": 8.066539081325427e-06, "loss": 1.712, "step": 19940 }, { "epoch": 0.5806207638828287, "grad_norm": 13.4375, "learning_rate": 8.064599799652812e-06, "loss": 1.6642, "step": 19960 }, { "epoch": 0.5812025482153765, "grad_norm": 11.75, "learning_rate": 8.062660517980197e-06, "loss": 1.5492, "step": 19980 }, { "epoch": 0.5817843325479245, "grad_norm": 11.75, "learning_rate": 8.060721236307582e-06, "loss": 1.5491, "step": 20000 }, { "epoch": 0.5823661168804725, "grad_norm": 10.0, "learning_rate": 8.058781954634966e-06, "loss": 1.6416, "step": 20020 }, { "epoch": 0.5829479012130203, "grad_norm": 13.6875, "learning_rate": 8.056842672962351e-06, "loss": 1.6852, "step": 20040 }, { "epoch": 0.5835296855455683, "grad_norm": 17.0, "learning_rate": 8.054903391289736e-06, "loss": 1.648, "step": 20060 }, { "epoch": 0.5841114698781161, "grad_norm": 12.9375, "learning_rate": 8.052964109617121e-06, "loss": 1.7016, "step": 20080 }, { "epoch": 0.5846932542106641, "grad_norm": 13.5, "learning_rate": 8.051024827944506e-06, "loss": 1.6324, "step": 20100 }, { "epoch": 0.5852750385432121, "grad_norm": 13.0, "learning_rate": 8.049085546271891e-06, "loss": 1.649, "step": 20120 }, { "epoch": 0.5858568228757599, "grad_norm": 10.8125, "learning_rate": 8.047146264599276e-06, "loss": 1.6909, "step": 20140 }, { "epoch": 0.5864386072083079, "grad_norm": 14.0, "learning_rate": 8.045206982926661e-06, "loss": 1.6395, "step": 20160 }, { "epoch": 0.5870203915408558, "grad_norm": 12.0625, "learning_rate": 8.043267701254047e-06, "loss": 1.713, "step": 20180 }, { "epoch": 0.5876021758734037, "grad_norm": 12.75, "learning_rate": 8.041328419581432e-06, "loss": 1.606, "step": 20200 }, { "epoch": 0.5881839602059517, "grad_norm": 12.8125, "learning_rate": 8.039389137908817e-06, "loss": 1.6471, "step": 20220 }, { "epoch": 0.5887657445384996, "grad_norm": 14.75, "learning_rate": 8.037449856236202e-06, "loss": 1.7117, "step": 20240 }, { "epoch": 0.5893475288710475, "grad_norm": 12.4375, "learning_rate": 8.035510574563587e-06, "loss": 1.5581, "step": 20260 }, { "epoch": 0.5899293132035954, "grad_norm": 12.0, "learning_rate": 8.033571292890972e-06, "loss": 1.6135, "step": 20280 }, { "epoch": 0.5905110975361434, "grad_norm": 11.25, "learning_rate": 8.031632011218357e-06, "loss": 1.6054, "step": 20300 }, { "epoch": 0.5910928818686912, "grad_norm": 13.4375, "learning_rate": 8.029692729545742e-06, "loss": 1.771, "step": 20320 }, { "epoch": 0.5916746662012392, "grad_norm": 11.25, "learning_rate": 8.027753447873127e-06, "loss": 1.6584, "step": 20340 }, { "epoch": 0.5922564505337872, "grad_norm": 9.875, "learning_rate": 8.025814166200512e-06, "loss": 1.6999, "step": 20360 }, { "epoch": 0.592838234866335, "grad_norm": 11.4375, "learning_rate": 8.023874884527898e-06, "loss": 1.6357, "step": 20380 }, { "epoch": 0.593420019198883, "grad_norm": 11.8125, "learning_rate": 8.021935602855283e-06, "loss": 1.5729, "step": 20400 }, { "epoch": 0.594001803531431, "grad_norm": 13.5, "learning_rate": 8.019996321182668e-06, "loss": 1.6684, "step": 20420 }, { "epoch": 0.5945835878639788, "grad_norm": 11.875, "learning_rate": 8.018057039510053e-06, "loss": 1.6585, "step": 20440 }, { "epoch": 0.5951653721965268, "grad_norm": 12.625, "learning_rate": 8.016117757837438e-06, "loss": 1.6137, "step": 20460 }, { "epoch": 0.5957471565290746, "grad_norm": 10.75, "learning_rate": 8.014178476164823e-06, "loss": 1.647, "step": 20480 }, { "epoch": 0.5963289408616226, "grad_norm": 12.5625, "learning_rate": 8.012239194492208e-06, "loss": 1.6484, "step": 20500 }, { "epoch": 0.5969107251941705, "grad_norm": 14.0625, "learning_rate": 8.010299912819593e-06, "loss": 1.7112, "step": 20520 }, { "epoch": 0.5974925095267184, "grad_norm": 9.75, "learning_rate": 8.008360631146978e-06, "loss": 1.6521, "step": 20540 }, { "epoch": 0.5980742938592664, "grad_norm": 11.5, "learning_rate": 8.006421349474363e-06, "loss": 1.6941, "step": 20560 }, { "epoch": 0.5986560781918143, "grad_norm": 11.125, "learning_rate": 8.004482067801747e-06, "loss": 1.7052, "step": 20580 }, { "epoch": 0.5992378625243622, "grad_norm": 11.5, "learning_rate": 8.002542786129132e-06, "loss": 1.5777, "step": 20600 }, { "epoch": 0.5998196468569101, "grad_norm": 9.8125, "learning_rate": 8.000603504456517e-06, "loss": 1.7117, "step": 20620 }, { "epoch": 0.6004014311894581, "grad_norm": 13.1875, "learning_rate": 7.998664222783902e-06, "loss": 1.6213, "step": 20640 }, { "epoch": 0.600983215522006, "grad_norm": 13.375, "learning_rate": 7.996724941111287e-06, "loss": 1.6577, "step": 20660 }, { "epoch": 0.6015649998545539, "grad_norm": 13.4375, "learning_rate": 7.994785659438672e-06, "loss": 1.6816, "step": 20680 }, { "epoch": 0.6021467841871019, "grad_norm": 12.5, "learning_rate": 7.992846377766057e-06, "loss": 1.536, "step": 20700 }, { "epoch": 0.6027285685196497, "grad_norm": 11.5, "learning_rate": 7.990907096093442e-06, "loss": 1.5839, "step": 20720 }, { "epoch": 0.6033103528521977, "grad_norm": 10.625, "learning_rate": 7.988967814420828e-06, "loss": 1.7227, "step": 20740 }, { "epoch": 0.6038921371847457, "grad_norm": 14.0625, "learning_rate": 7.987028532748213e-06, "loss": 1.6775, "step": 20760 }, { "epoch": 0.6044739215172935, "grad_norm": 12.8125, "learning_rate": 7.985089251075598e-06, "loss": 1.6013, "step": 20780 }, { "epoch": 0.6050557058498415, "grad_norm": 14.3125, "learning_rate": 7.983149969402983e-06, "loss": 1.5694, "step": 20800 }, { "epoch": 0.6056374901823894, "grad_norm": 13.875, "learning_rate": 7.981210687730368e-06, "loss": 1.5985, "step": 20820 }, { "epoch": 0.6062192745149373, "grad_norm": 10.375, "learning_rate": 7.979271406057753e-06, "loss": 1.586, "step": 20840 }, { "epoch": 0.6068010588474853, "grad_norm": 11.25, "learning_rate": 7.977332124385138e-06, "loss": 1.5524, "step": 20860 }, { "epoch": 0.6073828431800332, "grad_norm": 12.4375, "learning_rate": 7.975392842712523e-06, "loss": 1.5534, "step": 20880 }, { "epoch": 0.6079646275125811, "grad_norm": 12.75, "learning_rate": 7.973453561039908e-06, "loss": 1.649, "step": 20900 }, { "epoch": 0.608546411845129, "grad_norm": 11.8125, "learning_rate": 7.971514279367293e-06, "loss": 1.6024, "step": 20920 }, { "epoch": 0.6091281961776769, "grad_norm": 15.875, "learning_rate": 7.969574997694679e-06, "loss": 1.6998, "step": 20940 }, { "epoch": 0.6097099805102248, "grad_norm": 15.0625, "learning_rate": 7.967635716022064e-06, "loss": 1.6045, "step": 20960 }, { "epoch": 0.6102917648427728, "grad_norm": 11.0625, "learning_rate": 7.965696434349449e-06, "loss": 1.6288, "step": 20980 }, { "epoch": 0.6108735491753207, "grad_norm": 13.5625, "learning_rate": 7.963757152676834e-06, "loss": 1.5685, "step": 21000 }, { "epoch": 0.6114553335078686, "grad_norm": 12.0625, "learning_rate": 7.961817871004219e-06, "loss": 1.7005, "step": 21020 }, { "epoch": 0.6120371178404166, "grad_norm": 16.125, "learning_rate": 7.959878589331604e-06, "loss": 1.58, "step": 21040 }, { "epoch": 0.6126189021729644, "grad_norm": 11.5, "learning_rate": 7.957939307658989e-06, "loss": 1.6479, "step": 21060 }, { "epoch": 0.6132006865055124, "grad_norm": 12.375, "learning_rate": 7.956000025986374e-06, "loss": 1.5816, "step": 21080 }, { "epoch": 0.6137824708380604, "grad_norm": 11.1875, "learning_rate": 7.95406074431376e-06, "loss": 1.6428, "step": 21100 }, { "epoch": 0.6143642551706082, "grad_norm": 11.25, "learning_rate": 7.952121462641144e-06, "loss": 1.6375, "step": 21120 }, { "epoch": 0.6149460395031562, "grad_norm": 12.4375, "learning_rate": 7.95018218096853e-06, "loss": 1.6391, "step": 21140 }, { "epoch": 0.6155278238357041, "grad_norm": 13.875, "learning_rate": 7.948242899295915e-06, "loss": 1.6711, "step": 21160 }, { "epoch": 0.616109608168252, "grad_norm": 14.875, "learning_rate": 7.9463036176233e-06, "loss": 1.6958, "step": 21180 }, { "epoch": 0.6166913925008, "grad_norm": 13.125, "learning_rate": 7.944364335950685e-06, "loss": 1.5907, "step": 21200 }, { "epoch": 0.6172731768333479, "grad_norm": 12.4375, "learning_rate": 7.94242505427807e-06, "loss": 1.6955, "step": 21220 }, { "epoch": 0.6178549611658958, "grad_norm": 12.1875, "learning_rate": 7.940485772605455e-06, "loss": 1.646, "step": 21240 }, { "epoch": 0.6184367454984437, "grad_norm": 13.1875, "learning_rate": 7.93854649093284e-06, "loss": 1.7521, "step": 21260 }, { "epoch": 0.6190185298309917, "grad_norm": 12.625, "learning_rate": 7.936607209260225e-06, "loss": 1.6426, "step": 21280 }, { "epoch": 0.6196003141635396, "grad_norm": 13.125, "learning_rate": 7.93466792758761e-06, "loss": 1.5326, "step": 21300 }, { "epoch": 0.6201820984960875, "grad_norm": 12.5, "learning_rate": 7.932728645914995e-06, "loss": 1.5717, "step": 21320 }, { "epoch": 0.6207638828286354, "grad_norm": 12.0, "learning_rate": 7.93078936424238e-06, "loss": 1.5635, "step": 21340 }, { "epoch": 0.6213456671611833, "grad_norm": 11.8125, "learning_rate": 7.928850082569766e-06, "loss": 1.7181, "step": 21360 }, { "epoch": 0.6219274514937313, "grad_norm": 14.3125, "learning_rate": 7.92691080089715e-06, "loss": 1.6385, "step": 21380 }, { "epoch": 0.6225092358262792, "grad_norm": 13.0, "learning_rate": 7.924971519224536e-06, "loss": 1.6784, "step": 21400 }, { "epoch": 0.6230910201588271, "grad_norm": 15.5, "learning_rate": 7.923032237551921e-06, "loss": 1.7064, "step": 21420 }, { "epoch": 0.6236728044913751, "grad_norm": 12.125, "learning_rate": 7.921092955879306e-06, "loss": 1.6372, "step": 21440 }, { "epoch": 0.6242545888239229, "grad_norm": 11.75, "learning_rate": 7.919153674206691e-06, "loss": 1.7176, "step": 21460 }, { "epoch": 0.6248363731564709, "grad_norm": 11.875, "learning_rate": 7.917214392534076e-06, "loss": 1.6417, "step": 21480 }, { "epoch": 0.6254181574890189, "grad_norm": 14.375, "learning_rate": 7.915275110861461e-06, "loss": 1.5704, "step": 21500 }, { "epoch": 0.6259999418215667, "grad_norm": 10.0625, "learning_rate": 7.913335829188846e-06, "loss": 1.5801, "step": 21520 }, { "epoch": 0.6265817261541147, "grad_norm": 11.5625, "learning_rate": 7.911396547516231e-06, "loss": 1.6788, "step": 21540 }, { "epoch": 0.6271635104866626, "grad_norm": 12.625, "learning_rate": 7.909457265843617e-06, "loss": 1.6491, "step": 21560 }, { "epoch": 0.6277452948192105, "grad_norm": 13.3125, "learning_rate": 7.907517984171002e-06, "loss": 1.6589, "step": 21580 }, { "epoch": 0.6283270791517584, "grad_norm": 11.9375, "learning_rate": 7.905578702498387e-06, "loss": 1.6245, "step": 21600 }, { "epoch": 0.6289088634843064, "grad_norm": 11.5, "learning_rate": 7.903639420825772e-06, "loss": 1.6734, "step": 21620 }, { "epoch": 0.6294906478168543, "grad_norm": 14.6875, "learning_rate": 7.901700139153157e-06, "loss": 1.6331, "step": 21640 }, { "epoch": 0.6300724321494022, "grad_norm": 9.5625, "learning_rate": 7.899760857480542e-06, "loss": 1.6249, "step": 21660 }, { "epoch": 0.6306542164819502, "grad_norm": 10.1875, "learning_rate": 7.897821575807927e-06, "loss": 1.6278, "step": 21680 }, { "epoch": 0.631236000814498, "grad_norm": 13.5625, "learning_rate": 7.895882294135312e-06, "loss": 1.689, "step": 21700 }, { "epoch": 0.631817785147046, "grad_norm": 11.9375, "learning_rate": 7.893943012462697e-06, "loss": 1.6893, "step": 21720 }, { "epoch": 0.6323995694795939, "grad_norm": 11.5625, "learning_rate": 7.892003730790082e-06, "loss": 1.6971, "step": 21740 }, { "epoch": 0.6329813538121418, "grad_norm": 11.6875, "learning_rate": 7.890064449117468e-06, "loss": 1.5925, "step": 21760 }, { "epoch": 0.6335631381446898, "grad_norm": 12.125, "learning_rate": 7.888125167444853e-06, "loss": 1.6094, "step": 21780 }, { "epoch": 0.6341449224772376, "grad_norm": 11.4375, "learning_rate": 7.886185885772238e-06, "loss": 1.6971, "step": 21800 }, { "epoch": 0.6347267068097856, "grad_norm": 12.8125, "learning_rate": 7.884246604099623e-06, "loss": 1.6558, "step": 21820 }, { "epoch": 0.6353084911423336, "grad_norm": 13.6875, "learning_rate": 7.882307322427008e-06, "loss": 1.6819, "step": 21840 }, { "epoch": 0.6358902754748814, "grad_norm": 13.5625, "learning_rate": 7.880368040754393e-06, "loss": 1.5455, "step": 21860 }, { "epoch": 0.6364720598074294, "grad_norm": 12.1875, "learning_rate": 7.878428759081778e-06, "loss": 1.6307, "step": 21880 }, { "epoch": 0.6370538441399773, "grad_norm": 11.5625, "learning_rate": 7.876489477409163e-06, "loss": 1.6101, "step": 21900 }, { "epoch": 0.6376356284725252, "grad_norm": 12.6875, "learning_rate": 7.874550195736548e-06, "loss": 1.5181, "step": 21920 }, { "epoch": 0.6382174128050732, "grad_norm": 12.4375, "learning_rate": 7.872610914063933e-06, "loss": 1.6125, "step": 21940 }, { "epoch": 0.6387991971376211, "grad_norm": 13.1875, "learning_rate": 7.870671632391319e-06, "loss": 1.6586, "step": 21960 }, { "epoch": 0.639380981470169, "grad_norm": 12.375, "learning_rate": 7.868732350718704e-06, "loss": 1.6246, "step": 21980 }, { "epoch": 0.6399627658027169, "grad_norm": 11.875, "learning_rate": 7.866793069046089e-06, "loss": 1.5835, "step": 22000 }, { "epoch": 0.6405445501352649, "grad_norm": 12.5, "learning_rate": 7.864853787373474e-06, "loss": 1.6767, "step": 22020 }, { "epoch": 0.6411263344678128, "grad_norm": 12.25, "learning_rate": 7.862914505700859e-06, "loss": 1.5982, "step": 22040 }, { "epoch": 0.6417081188003607, "grad_norm": 10.1875, "learning_rate": 7.860975224028244e-06, "loss": 1.6858, "step": 22060 }, { "epoch": 0.6422899031329087, "grad_norm": 13.75, "learning_rate": 7.859035942355629e-06, "loss": 1.6265, "step": 22080 }, { "epoch": 0.6428716874654565, "grad_norm": 13.5625, "learning_rate": 7.857096660683014e-06, "loss": 1.6599, "step": 22100 }, { "epoch": 0.6434534717980045, "grad_norm": 12.3125, "learning_rate": 7.8551573790104e-06, "loss": 1.6791, "step": 22120 }, { "epoch": 0.6440352561305525, "grad_norm": 13.625, "learning_rate": 7.853218097337784e-06, "loss": 1.6365, "step": 22140 }, { "epoch": 0.6446170404631003, "grad_norm": 13.8125, "learning_rate": 7.85127881566517e-06, "loss": 1.6622, "step": 22160 }, { "epoch": 0.6451988247956483, "grad_norm": 12.625, "learning_rate": 7.849339533992555e-06, "loss": 1.6112, "step": 22180 }, { "epoch": 0.6457806091281961, "grad_norm": 12.0625, "learning_rate": 7.84740025231994e-06, "loss": 1.7412, "step": 22200 }, { "epoch": 0.6463623934607441, "grad_norm": 12.5625, "learning_rate": 7.845460970647323e-06, "loss": 1.6402, "step": 22220 }, { "epoch": 0.646944177793292, "grad_norm": 13.5625, "learning_rate": 7.843521688974708e-06, "loss": 1.6437, "step": 22240 }, { "epoch": 0.6475259621258399, "grad_norm": 10.9375, "learning_rate": 7.841582407302093e-06, "loss": 1.5291, "step": 22260 }, { "epoch": 0.6481077464583879, "grad_norm": 13.125, "learning_rate": 7.839643125629478e-06, "loss": 1.6853, "step": 22280 }, { "epoch": 0.6486895307909358, "grad_norm": 11.375, "learning_rate": 7.837703843956863e-06, "loss": 1.6956, "step": 22300 }, { "epoch": 0.6492713151234837, "grad_norm": 15.875, "learning_rate": 7.835764562284249e-06, "loss": 1.5831, "step": 22320 }, { "epoch": 0.6498530994560316, "grad_norm": 13.1875, "learning_rate": 7.833825280611634e-06, "loss": 1.6551, "step": 22340 }, { "epoch": 0.6504348837885796, "grad_norm": 11.875, "learning_rate": 7.831885998939019e-06, "loss": 1.5802, "step": 22360 }, { "epoch": 0.6510166681211275, "grad_norm": 13.125, "learning_rate": 7.829946717266404e-06, "loss": 1.6445, "step": 22380 }, { "epoch": 0.6515984524536754, "grad_norm": 12.3125, "learning_rate": 7.828007435593789e-06, "loss": 1.624, "step": 22400 }, { "epoch": 0.6521802367862234, "grad_norm": 10.5, "learning_rate": 7.826068153921174e-06, "loss": 1.5483, "step": 22420 }, { "epoch": 0.6527620211187712, "grad_norm": 11.5, "learning_rate": 7.82412887224856e-06, "loss": 1.6606, "step": 22440 }, { "epoch": 0.6533438054513192, "grad_norm": 13.0625, "learning_rate": 7.822189590575944e-06, "loss": 1.6794, "step": 22460 }, { "epoch": 0.6539255897838672, "grad_norm": 12.5625, "learning_rate": 7.82025030890333e-06, "loss": 1.6568, "step": 22480 }, { "epoch": 0.654507374116415, "grad_norm": 12.0, "learning_rate": 7.818311027230714e-06, "loss": 1.6155, "step": 22500 }, { "epoch": 0.655089158448963, "grad_norm": 10.875, "learning_rate": 7.8163717455581e-06, "loss": 1.5812, "step": 22520 }, { "epoch": 0.655670942781511, "grad_norm": 10.125, "learning_rate": 7.814432463885485e-06, "loss": 1.7043, "step": 22540 }, { "epoch": 0.6562527271140588, "grad_norm": 10.8125, "learning_rate": 7.81249318221287e-06, "loss": 1.6885, "step": 22560 }, { "epoch": 0.6568345114466068, "grad_norm": 13.5625, "learning_rate": 7.810553900540255e-06, "loss": 1.6724, "step": 22580 }, { "epoch": 0.6574162957791546, "grad_norm": 14.3125, "learning_rate": 7.80861461886764e-06, "loss": 1.6988, "step": 22600 }, { "epoch": 0.6579980801117026, "grad_norm": 14.0625, "learning_rate": 7.806675337195025e-06, "loss": 1.5984, "step": 22620 }, { "epoch": 0.6585798644442505, "grad_norm": 13.6875, "learning_rate": 7.80473605552241e-06, "loss": 1.625, "step": 22640 }, { "epoch": 0.6591616487767984, "grad_norm": 11.5625, "learning_rate": 7.802796773849795e-06, "loss": 1.6485, "step": 22660 }, { "epoch": 0.6597434331093464, "grad_norm": 11.625, "learning_rate": 7.80085749217718e-06, "loss": 1.6211, "step": 22680 }, { "epoch": 0.6603252174418943, "grad_norm": 11.9375, "learning_rate": 7.798918210504565e-06, "loss": 1.6608, "step": 22700 }, { "epoch": 0.6609070017744422, "grad_norm": 12.0, "learning_rate": 7.79697892883195e-06, "loss": 1.7853, "step": 22720 }, { "epoch": 0.6614887861069901, "grad_norm": 12.9375, "learning_rate": 7.795039647159336e-06, "loss": 1.6535, "step": 22740 }, { "epoch": 0.6620705704395381, "grad_norm": 11.9375, "learning_rate": 7.79310036548672e-06, "loss": 1.6026, "step": 22760 }, { "epoch": 0.662652354772086, "grad_norm": 11.5, "learning_rate": 7.791161083814106e-06, "loss": 1.6255, "step": 22780 }, { "epoch": 0.6632341391046339, "grad_norm": 11.875, "learning_rate": 7.789221802141491e-06, "loss": 1.6588, "step": 22800 }, { "epoch": 0.6638159234371819, "grad_norm": 11.875, "learning_rate": 7.787282520468876e-06, "loss": 1.5569, "step": 22820 }, { "epoch": 0.6643977077697297, "grad_norm": 12.125, "learning_rate": 7.785343238796261e-06, "loss": 1.6181, "step": 22840 }, { "epoch": 0.6649794921022777, "grad_norm": 15.0, "learning_rate": 7.783403957123646e-06, "loss": 1.5772, "step": 22860 }, { "epoch": 0.6655612764348257, "grad_norm": 11.5, "learning_rate": 7.781464675451031e-06, "loss": 1.5771, "step": 22880 }, { "epoch": 0.6661430607673735, "grad_norm": 13.8125, "learning_rate": 7.779525393778416e-06, "loss": 1.704, "step": 22900 }, { "epoch": 0.6667248450999215, "grad_norm": 13.125, "learning_rate": 7.777586112105802e-06, "loss": 1.6354, "step": 22920 }, { "epoch": 0.6673066294324694, "grad_norm": 12.0625, "learning_rate": 7.775646830433187e-06, "loss": 1.6523, "step": 22940 }, { "epoch": 0.6678884137650173, "grad_norm": 12.5625, "learning_rate": 7.773707548760572e-06, "loss": 1.6739, "step": 22960 }, { "epoch": 0.6684701980975652, "grad_norm": 11.0, "learning_rate": 7.771768267087957e-06, "loss": 1.5769, "step": 22980 }, { "epoch": 0.6690519824301132, "grad_norm": 10.8125, "learning_rate": 7.769828985415342e-06, "loss": 1.6732, "step": 23000 }, { "epoch": 0.6696337667626611, "grad_norm": 12.9375, "learning_rate": 7.767889703742727e-06, "loss": 1.601, "step": 23020 }, { "epoch": 0.670215551095209, "grad_norm": 19.125, "learning_rate": 7.765950422070112e-06, "loss": 1.7081, "step": 23040 }, { "epoch": 0.6707973354277569, "grad_norm": 10.0625, "learning_rate": 7.764011140397497e-06, "loss": 1.5929, "step": 23060 }, { "epoch": 0.6713791197603048, "grad_norm": 11.6875, "learning_rate": 7.762071858724882e-06, "loss": 1.6029, "step": 23080 }, { "epoch": 0.6719609040928528, "grad_norm": 11.9375, "learning_rate": 7.760132577052267e-06, "loss": 1.5392, "step": 23100 }, { "epoch": 0.6725426884254007, "grad_norm": 12.125, "learning_rate": 7.758193295379652e-06, "loss": 1.5838, "step": 23120 }, { "epoch": 0.6731244727579486, "grad_norm": 12.125, "learning_rate": 7.756254013707038e-06, "loss": 1.6709, "step": 23140 }, { "epoch": 0.6737062570904966, "grad_norm": 11.0625, "learning_rate": 7.754314732034423e-06, "loss": 1.6044, "step": 23160 }, { "epoch": 0.6742880414230444, "grad_norm": 14.4375, "learning_rate": 7.752375450361808e-06, "loss": 1.6394, "step": 23180 }, { "epoch": 0.6748698257555924, "grad_norm": 12.4375, "learning_rate": 7.750436168689193e-06, "loss": 1.6326, "step": 23200 }, { "epoch": 0.6754516100881404, "grad_norm": 14.1875, "learning_rate": 7.748496887016578e-06, "loss": 1.6891, "step": 23220 }, { "epoch": 0.6760333944206882, "grad_norm": 12.25, "learning_rate": 7.746557605343963e-06, "loss": 1.6324, "step": 23240 }, { "epoch": 0.6766151787532362, "grad_norm": 13.25, "learning_rate": 7.744618323671348e-06, "loss": 1.6257, "step": 23260 }, { "epoch": 0.6771969630857841, "grad_norm": 16.0, "learning_rate": 7.742679041998733e-06, "loss": 1.6609, "step": 23280 }, { "epoch": 0.677778747418332, "grad_norm": 12.875, "learning_rate": 7.740739760326118e-06, "loss": 1.6568, "step": 23300 }, { "epoch": 0.67836053175088, "grad_norm": 12.25, "learning_rate": 7.738800478653503e-06, "loss": 1.6479, "step": 23320 }, { "epoch": 0.6789423160834279, "grad_norm": 13.125, "learning_rate": 7.736861196980889e-06, "loss": 1.6478, "step": 23340 }, { "epoch": 0.6795241004159758, "grad_norm": 14.375, "learning_rate": 7.734921915308274e-06, "loss": 1.6291, "step": 23360 }, { "epoch": 0.6801058847485237, "grad_norm": 12.3125, "learning_rate": 7.732982633635659e-06, "loss": 1.5804, "step": 23380 }, { "epoch": 0.6806876690810717, "grad_norm": 9.875, "learning_rate": 7.731043351963044e-06, "loss": 1.6415, "step": 23400 }, { "epoch": 0.6812694534136196, "grad_norm": 14.8125, "learning_rate": 7.729104070290429e-06, "loss": 1.5866, "step": 23420 }, { "epoch": 0.6818512377461675, "grad_norm": 13.25, "learning_rate": 7.727164788617814e-06, "loss": 1.583, "step": 23440 }, { "epoch": 0.6824330220787154, "grad_norm": 13.6875, "learning_rate": 7.725225506945199e-06, "loss": 1.6845, "step": 23460 }, { "epoch": 0.6830148064112633, "grad_norm": 11.75, "learning_rate": 7.723286225272584e-06, "loss": 1.7366, "step": 23480 }, { "epoch": 0.6835965907438113, "grad_norm": 8.6875, "learning_rate": 7.72134694359997e-06, "loss": 1.6201, "step": 23500 }, { "epoch": 0.6841783750763591, "grad_norm": 13.3125, "learning_rate": 7.719407661927354e-06, "loss": 1.613, "step": 23520 }, { "epoch": 0.6847601594089071, "grad_norm": 12.375, "learning_rate": 7.71746838025474e-06, "loss": 1.6545, "step": 23540 }, { "epoch": 0.6853419437414551, "grad_norm": 10.875, "learning_rate": 7.715529098582125e-06, "loss": 1.5382, "step": 23560 }, { "epoch": 0.6859237280740029, "grad_norm": 14.25, "learning_rate": 7.71358981690951e-06, "loss": 1.6518, "step": 23580 }, { "epoch": 0.6865055124065509, "grad_norm": 12.1875, "learning_rate": 7.711650535236895e-06, "loss": 1.6836, "step": 23600 }, { "epoch": 0.6870872967390989, "grad_norm": 9.25, "learning_rate": 7.70971125356428e-06, "loss": 1.6406, "step": 23620 }, { "epoch": 0.6876690810716467, "grad_norm": 11.6875, "learning_rate": 7.707771971891665e-06, "loss": 1.6658, "step": 23640 }, { "epoch": 0.6882508654041947, "grad_norm": 14.6875, "learning_rate": 7.70583269021905e-06, "loss": 1.67, "step": 23660 }, { "epoch": 0.6888326497367426, "grad_norm": 12.25, "learning_rate": 7.703893408546435e-06, "loss": 1.5416, "step": 23680 }, { "epoch": 0.6894144340692905, "grad_norm": 13.3125, "learning_rate": 7.70195412687382e-06, "loss": 1.6719, "step": 23700 }, { "epoch": 0.6899962184018384, "grad_norm": 13.0625, "learning_rate": 7.700014845201205e-06, "loss": 1.6852, "step": 23720 }, { "epoch": 0.6905780027343864, "grad_norm": 15.0, "learning_rate": 7.69807556352859e-06, "loss": 1.5557, "step": 23740 }, { "epoch": 0.6911597870669343, "grad_norm": 13.9375, "learning_rate": 7.696136281855976e-06, "loss": 1.6628, "step": 23760 }, { "epoch": 0.6917415713994822, "grad_norm": 11.125, "learning_rate": 7.69419700018336e-06, "loss": 1.6792, "step": 23780 }, { "epoch": 0.6923233557320302, "grad_norm": 12.75, "learning_rate": 7.692257718510746e-06, "loss": 1.6234, "step": 23800 }, { "epoch": 0.692905140064578, "grad_norm": 16.5, "learning_rate": 7.690318436838131e-06, "loss": 1.5968, "step": 23820 }, { "epoch": 0.693486924397126, "grad_norm": 13.5, "learning_rate": 7.688379155165514e-06, "loss": 1.658, "step": 23840 }, { "epoch": 0.6940687087296739, "grad_norm": 11.1875, "learning_rate": 7.6864398734929e-06, "loss": 1.6739, "step": 23860 }, { "epoch": 0.6946504930622218, "grad_norm": 15.125, "learning_rate": 7.684500591820284e-06, "loss": 1.6477, "step": 23880 }, { "epoch": 0.6952322773947698, "grad_norm": 12.3125, "learning_rate": 7.68256131014767e-06, "loss": 1.5933, "step": 23900 }, { "epoch": 0.6958140617273176, "grad_norm": 12.625, "learning_rate": 7.680622028475055e-06, "loss": 1.642, "step": 23920 }, { "epoch": 0.6963958460598656, "grad_norm": 13.375, "learning_rate": 7.67868274680244e-06, "loss": 1.6607, "step": 23940 }, { "epoch": 0.6969776303924136, "grad_norm": 15.625, "learning_rate": 7.676743465129825e-06, "loss": 1.6606, "step": 23960 }, { "epoch": 0.6975594147249614, "grad_norm": 13.625, "learning_rate": 7.67480418345721e-06, "loss": 1.6539, "step": 23980 }, { "epoch": 0.6981411990575094, "grad_norm": 14.0, "learning_rate": 7.672864901784595e-06, "loss": 1.5874, "step": 24000 }, { "epoch": 0.6987229833900573, "grad_norm": 9.625, "learning_rate": 7.67092562011198e-06, "loss": 1.5782, "step": 24020 }, { "epoch": 0.6993047677226052, "grad_norm": 15.625, "learning_rate": 7.668986338439365e-06, "loss": 1.6254, "step": 24040 }, { "epoch": 0.6998865520551532, "grad_norm": 12.25, "learning_rate": 7.66704705676675e-06, "loss": 1.671, "step": 24060 }, { "epoch": 0.7004683363877011, "grad_norm": 10.3125, "learning_rate": 7.665107775094135e-06, "loss": 1.6397, "step": 24080 }, { "epoch": 0.701050120720249, "grad_norm": 11.0, "learning_rate": 7.66316849342152e-06, "loss": 1.741, "step": 24100 }, { "epoch": 0.7016319050527969, "grad_norm": 12.0, "learning_rate": 7.661229211748906e-06, "loss": 1.5878, "step": 24120 }, { "epoch": 0.7022136893853449, "grad_norm": 8.375, "learning_rate": 7.65928993007629e-06, "loss": 1.6567, "step": 24140 }, { "epoch": 0.7027954737178927, "grad_norm": 11.8125, "learning_rate": 7.657350648403676e-06, "loss": 1.56, "step": 24160 }, { "epoch": 0.7033772580504407, "grad_norm": 12.4375, "learning_rate": 7.655411366731061e-06, "loss": 1.6866, "step": 24180 }, { "epoch": 0.7039590423829887, "grad_norm": 14.0625, "learning_rate": 7.653472085058446e-06, "loss": 1.7478, "step": 24200 }, { "epoch": 0.7045408267155365, "grad_norm": 9.9375, "learning_rate": 7.651532803385831e-06, "loss": 1.6249, "step": 24220 }, { "epoch": 0.7051226110480845, "grad_norm": 12.25, "learning_rate": 7.649593521713216e-06, "loss": 1.6672, "step": 24240 }, { "epoch": 0.7057043953806325, "grad_norm": 9.8125, "learning_rate": 7.647654240040601e-06, "loss": 1.6609, "step": 24260 }, { "epoch": 0.7062861797131803, "grad_norm": 12.6875, "learning_rate": 7.645714958367986e-06, "loss": 1.6397, "step": 24280 }, { "epoch": 0.7068679640457283, "grad_norm": 10.75, "learning_rate": 7.643775676695372e-06, "loss": 1.5696, "step": 24300 }, { "epoch": 0.7074497483782761, "grad_norm": 7.09375, "learning_rate": 7.641836395022757e-06, "loss": 1.6747, "step": 24320 }, { "epoch": 0.7080315327108241, "grad_norm": 12.75, "learning_rate": 7.639897113350142e-06, "loss": 1.5875, "step": 24340 }, { "epoch": 0.708613317043372, "grad_norm": 13.125, "learning_rate": 7.637957831677527e-06, "loss": 1.6379, "step": 24360 }, { "epoch": 0.7091951013759199, "grad_norm": 10.125, "learning_rate": 7.636018550004912e-06, "loss": 1.6031, "step": 24380 }, { "epoch": 0.7097768857084679, "grad_norm": 10.375, "learning_rate": 7.634079268332297e-06, "loss": 1.6378, "step": 24400 }, { "epoch": 0.7103586700410158, "grad_norm": 12.8125, "learning_rate": 7.632139986659682e-06, "loss": 1.703, "step": 24420 }, { "epoch": 0.7109404543735637, "grad_norm": 13.625, "learning_rate": 7.630200704987067e-06, "loss": 1.5996, "step": 24440 }, { "epoch": 0.7115222387061116, "grad_norm": 10.3125, "learning_rate": 7.628261423314452e-06, "loss": 1.6742, "step": 24460 }, { "epoch": 0.7121040230386596, "grad_norm": 11.3125, "learning_rate": 7.626322141641837e-06, "loss": 1.6109, "step": 24480 }, { "epoch": 0.7126858073712075, "grad_norm": 14.125, "learning_rate": 7.6243828599692225e-06, "loss": 1.6273, "step": 24500 }, { "epoch": 0.7132675917037554, "grad_norm": 11.875, "learning_rate": 7.622443578296608e-06, "loss": 1.5985, "step": 24520 }, { "epoch": 0.7138493760363034, "grad_norm": 12.5, "learning_rate": 7.620504296623993e-06, "loss": 1.5652, "step": 24540 }, { "epoch": 0.7144311603688512, "grad_norm": 13.3125, "learning_rate": 7.618565014951378e-06, "loss": 1.604, "step": 24560 }, { "epoch": 0.7150129447013992, "grad_norm": 13.125, "learning_rate": 7.616625733278763e-06, "loss": 1.5017, "step": 24580 }, { "epoch": 0.7155947290339472, "grad_norm": 14.0, "learning_rate": 7.614686451606148e-06, "loss": 1.7284, "step": 24600 }, { "epoch": 0.716176513366495, "grad_norm": 11.3125, "learning_rate": 7.612747169933533e-06, "loss": 1.6873, "step": 24620 }, { "epoch": 0.716758297699043, "grad_norm": 14.1875, "learning_rate": 7.610807888260918e-06, "loss": 1.7274, "step": 24640 }, { "epoch": 0.7173400820315909, "grad_norm": 12.875, "learning_rate": 7.608868606588303e-06, "loss": 1.5654, "step": 24660 }, { "epoch": 0.7179218663641388, "grad_norm": 10.8125, "learning_rate": 7.606929324915688e-06, "loss": 1.5104, "step": 24680 }, { "epoch": 0.7185036506966868, "grad_norm": 12.3125, "learning_rate": 7.6049900432430735e-06, "loss": 1.6304, "step": 24700 }, { "epoch": 0.7190854350292346, "grad_norm": 13.625, "learning_rate": 7.603050761570459e-06, "loss": 1.5513, "step": 24720 }, { "epoch": 0.7196672193617826, "grad_norm": 11.25, "learning_rate": 7.601111479897844e-06, "loss": 1.6296, "step": 24740 }, { "epoch": 0.7202490036943305, "grad_norm": 11.375, "learning_rate": 7.599172198225229e-06, "loss": 1.6554, "step": 24760 }, { "epoch": 0.7208307880268784, "grad_norm": 13.4375, "learning_rate": 7.597232916552614e-06, "loss": 1.689, "step": 24780 }, { "epoch": 0.7214125723594264, "grad_norm": 13.0625, "learning_rate": 7.595293634879999e-06, "loss": 1.6671, "step": 24800 }, { "epoch": 0.7219943566919743, "grad_norm": 13.25, "learning_rate": 7.593354353207382e-06, "loss": 1.6171, "step": 24820 }, { "epoch": 0.7225761410245222, "grad_norm": 13.375, "learning_rate": 7.5914150715347675e-06, "loss": 1.6282, "step": 24840 }, { "epoch": 0.7231579253570701, "grad_norm": 12.1875, "learning_rate": 7.5894757898621526e-06, "loss": 1.6428, "step": 24860 }, { "epoch": 0.7237397096896181, "grad_norm": 12.5, "learning_rate": 7.587536508189538e-06, "loss": 1.7236, "step": 24880 }, { "epoch": 0.724321494022166, "grad_norm": 11.625, "learning_rate": 7.585597226516923e-06, "loss": 1.7371, "step": 24900 }, { "epoch": 0.7249032783547139, "grad_norm": 11.5625, "learning_rate": 7.583657944844308e-06, "loss": 1.6998, "step": 24920 }, { "epoch": 0.7254850626872619, "grad_norm": 13.1875, "learning_rate": 7.581718663171693e-06, "loss": 1.5971, "step": 24940 }, { "epoch": 0.7260668470198097, "grad_norm": 17.25, "learning_rate": 7.579779381499078e-06, "loss": 1.7575, "step": 24960 }, { "epoch": 0.7266486313523577, "grad_norm": 13.25, "learning_rate": 7.577840099826463e-06, "loss": 1.6709, "step": 24980 }, { "epoch": 0.7272304156849057, "grad_norm": 14.4375, "learning_rate": 7.575900818153848e-06, "loss": 1.5984, "step": 25000 }, { "epoch": 0.7278122000174535, "grad_norm": 15.0, "learning_rate": 7.573961536481233e-06, "loss": 1.7063, "step": 25020 }, { "epoch": 0.7283939843500015, "grad_norm": 11.125, "learning_rate": 7.5720222548086184e-06, "loss": 1.5983, "step": 25040 }, { "epoch": 0.7289757686825494, "grad_norm": 13.375, "learning_rate": 7.5700829731360035e-06, "loss": 1.516, "step": 25060 }, { "epoch": 0.7295575530150973, "grad_norm": 12.0, "learning_rate": 7.568143691463389e-06, "loss": 1.6036, "step": 25080 }, { "epoch": 0.7301393373476452, "grad_norm": 15.9375, "learning_rate": 7.566204409790774e-06, "loss": 1.6088, "step": 25100 }, { "epoch": 0.7307211216801931, "grad_norm": 12.0, "learning_rate": 7.564265128118159e-06, "loss": 1.7613, "step": 25120 }, { "epoch": 0.7313029060127411, "grad_norm": 12.125, "learning_rate": 7.562325846445544e-06, "loss": 1.638, "step": 25140 }, { "epoch": 0.731884690345289, "grad_norm": 13.375, "learning_rate": 7.560386564772929e-06, "loss": 1.644, "step": 25160 }, { "epoch": 0.7324664746778369, "grad_norm": 12.1875, "learning_rate": 7.558447283100314e-06, "loss": 1.6405, "step": 25180 }, { "epoch": 0.7330482590103848, "grad_norm": 10.625, "learning_rate": 7.556508001427699e-06, "loss": 1.6012, "step": 25200 }, { "epoch": 0.7336300433429328, "grad_norm": 10.9375, "learning_rate": 7.554568719755084e-06, "loss": 1.6877, "step": 25220 }, { "epoch": 0.7342118276754807, "grad_norm": 12.0, "learning_rate": 7.5526294380824694e-06, "loss": 1.6231, "step": 25240 }, { "epoch": 0.7347936120080286, "grad_norm": 13.6875, "learning_rate": 7.5506901564098545e-06, "loss": 1.6582, "step": 25260 }, { "epoch": 0.7353753963405766, "grad_norm": 14.1875, "learning_rate": 7.54875087473724e-06, "loss": 1.7438, "step": 25280 }, { "epoch": 0.7359571806731244, "grad_norm": 12.25, "learning_rate": 7.546811593064625e-06, "loss": 1.6458, "step": 25300 }, { "epoch": 0.7365389650056724, "grad_norm": 15.5, "learning_rate": 7.54487231139201e-06, "loss": 1.5911, "step": 25320 }, { "epoch": 0.7371207493382204, "grad_norm": 8.25, "learning_rate": 7.542933029719395e-06, "loss": 1.5868, "step": 25340 }, { "epoch": 0.7377025336707682, "grad_norm": 15.8125, "learning_rate": 7.54099374804678e-06, "loss": 1.583, "step": 25360 }, { "epoch": 0.7382843180033162, "grad_norm": 10.3125, "learning_rate": 7.539054466374165e-06, "loss": 1.6235, "step": 25380 }, { "epoch": 0.7388661023358641, "grad_norm": 11.8125, "learning_rate": 7.53711518470155e-06, "loss": 1.5862, "step": 25400 }, { "epoch": 0.739447886668412, "grad_norm": 13.125, "learning_rate": 7.535175903028935e-06, "loss": 1.6101, "step": 25420 }, { "epoch": 0.74002967100096, "grad_norm": 12.875, "learning_rate": 7.53323662135632e-06, "loss": 1.6228, "step": 25440 }, { "epoch": 0.7406114553335079, "grad_norm": 11.3125, "learning_rate": 7.5312973396837055e-06, "loss": 1.5888, "step": 25460 }, { "epoch": 0.7411932396660558, "grad_norm": 14.0625, "learning_rate": 7.529358058011091e-06, "loss": 1.6804, "step": 25480 }, { "epoch": 0.7417750239986037, "grad_norm": 13.375, "learning_rate": 7.527418776338476e-06, "loss": 1.6769, "step": 25500 }, { "epoch": 0.7423568083311517, "grad_norm": 12.4375, "learning_rate": 7.525479494665861e-06, "loss": 1.5462, "step": 25520 }, { "epoch": 0.7429385926636995, "grad_norm": 14.1875, "learning_rate": 7.523540212993246e-06, "loss": 1.7738, "step": 25540 }, { "epoch": 0.7435203769962475, "grad_norm": 8.8125, "learning_rate": 7.521600931320631e-06, "loss": 1.6262, "step": 25560 }, { "epoch": 0.7441021613287954, "grad_norm": 14.0, "learning_rate": 7.519661649648016e-06, "loss": 1.5742, "step": 25580 }, { "epoch": 0.7446839456613433, "grad_norm": 11.0625, "learning_rate": 7.517722367975401e-06, "loss": 1.7015, "step": 25600 }, { "epoch": 0.7452657299938913, "grad_norm": 12.125, "learning_rate": 7.515783086302786e-06, "loss": 1.7003, "step": 25620 }, { "epoch": 0.7458475143264391, "grad_norm": 13.25, "learning_rate": 7.5138438046301705e-06, "loss": 1.6813, "step": 25640 }, { "epoch": 0.7464292986589871, "grad_norm": 12.25, "learning_rate": 7.511904522957556e-06, "loss": 1.6657, "step": 25660 }, { "epoch": 0.7470110829915351, "grad_norm": 13.625, "learning_rate": 7.509965241284941e-06, "loss": 1.6187, "step": 25680 }, { "epoch": 0.7475928673240829, "grad_norm": 11.375, "learning_rate": 7.508025959612326e-06, "loss": 1.6316, "step": 25700 }, { "epoch": 0.7481746516566309, "grad_norm": 12.0625, "learning_rate": 7.506086677939711e-06, "loss": 1.6577, "step": 25720 }, { "epoch": 0.7487564359891788, "grad_norm": 12.5625, "learning_rate": 7.504147396267096e-06, "loss": 1.6205, "step": 25740 }, { "epoch": 0.7493382203217267, "grad_norm": 15.125, "learning_rate": 7.502208114594481e-06, "loss": 1.6341, "step": 25760 }, { "epoch": 0.7499200046542747, "grad_norm": 12.9375, "learning_rate": 7.500268832921866e-06, "loss": 1.6506, "step": 25780 }, { "epoch": 0.7505017889868226, "grad_norm": 13.5625, "learning_rate": 7.498329551249251e-06, "loss": 1.7797, "step": 25800 }, { "epoch": 0.7510835733193705, "grad_norm": 14.125, "learning_rate": 7.496390269576636e-06, "loss": 1.6324, "step": 25820 }, { "epoch": 0.7516653576519184, "grad_norm": 11.5625, "learning_rate": 7.4944509879040215e-06, "loss": 1.5381, "step": 25840 }, { "epoch": 0.7522471419844664, "grad_norm": 12.0, "learning_rate": 7.492511706231407e-06, "loss": 1.6465, "step": 25860 }, { "epoch": 0.7528289263170143, "grad_norm": 12.3125, "learning_rate": 7.490572424558792e-06, "loss": 1.6397, "step": 25880 }, { "epoch": 0.7534107106495622, "grad_norm": 12.5, "learning_rate": 7.488633142886177e-06, "loss": 1.6028, "step": 25900 }, { "epoch": 0.7539924949821102, "grad_norm": 12.125, "learning_rate": 7.486693861213562e-06, "loss": 1.6078, "step": 25920 }, { "epoch": 0.754574279314658, "grad_norm": 12.75, "learning_rate": 7.484754579540947e-06, "loss": 1.5693, "step": 25940 }, { "epoch": 0.755156063647206, "grad_norm": 11.5625, "learning_rate": 7.482815297868332e-06, "loss": 1.656, "step": 25960 }, { "epoch": 0.7557378479797539, "grad_norm": 10.5, "learning_rate": 7.480876016195717e-06, "loss": 1.6052, "step": 25980 }, { "epoch": 0.7563196323123018, "grad_norm": 14.5625, "learning_rate": 7.478936734523102e-06, "loss": 1.5874, "step": 26000 }, { "epoch": 0.7569014166448498, "grad_norm": 15.25, "learning_rate": 7.476997452850487e-06, "loss": 1.6457, "step": 26020 }, { "epoch": 0.7574832009773976, "grad_norm": 11.0625, "learning_rate": 7.4750581711778725e-06, "loss": 1.5725, "step": 26040 }, { "epoch": 0.7580649853099456, "grad_norm": 12.0, "learning_rate": 7.4731188895052576e-06, "loss": 1.6549, "step": 26060 }, { "epoch": 0.7586467696424936, "grad_norm": 12.3125, "learning_rate": 7.471179607832643e-06, "loss": 1.6379, "step": 26080 }, { "epoch": 0.7592285539750414, "grad_norm": 11.625, "learning_rate": 7.469240326160028e-06, "loss": 1.5308, "step": 26100 }, { "epoch": 0.7598103383075894, "grad_norm": 11.25, "learning_rate": 7.467301044487413e-06, "loss": 1.6131, "step": 26120 }, { "epoch": 0.7603921226401373, "grad_norm": 16.875, "learning_rate": 7.465361762814798e-06, "loss": 1.5696, "step": 26140 }, { "epoch": 0.7609739069726852, "grad_norm": 10.625, "learning_rate": 7.463422481142183e-06, "loss": 1.635, "step": 26160 }, { "epoch": 0.7615556913052332, "grad_norm": 10.8125, "learning_rate": 7.461483199469568e-06, "loss": 1.5774, "step": 26180 }, { "epoch": 0.7621374756377811, "grad_norm": 12.8125, "learning_rate": 7.459543917796953e-06, "loss": 1.5712, "step": 26200 }, { "epoch": 0.762719259970329, "grad_norm": 13.375, "learning_rate": 7.457604636124338e-06, "loss": 1.5626, "step": 26220 }, { "epoch": 0.7633010443028769, "grad_norm": 12.3125, "learning_rate": 7.4556653544517235e-06, "loss": 1.6614, "step": 26240 }, { "epoch": 0.7638828286354249, "grad_norm": 13.0, "learning_rate": 7.4537260727791085e-06, "loss": 1.5953, "step": 26260 }, { "epoch": 0.7644646129679727, "grad_norm": 12.125, "learning_rate": 7.451786791106494e-06, "loss": 1.6212, "step": 26280 }, { "epoch": 0.7650463973005207, "grad_norm": 14.8125, "learning_rate": 7.449847509433879e-06, "loss": 1.5182, "step": 26300 }, { "epoch": 0.7656281816330687, "grad_norm": 11.6875, "learning_rate": 7.447908227761264e-06, "loss": 1.6819, "step": 26320 }, { "epoch": 0.7662099659656165, "grad_norm": 12.6875, "learning_rate": 7.445968946088649e-06, "loss": 1.5642, "step": 26340 }, { "epoch": 0.7667917502981645, "grad_norm": 15.0, "learning_rate": 7.444029664416034e-06, "loss": 1.6226, "step": 26360 }, { "epoch": 0.7673735346307123, "grad_norm": 14.1875, "learning_rate": 7.442090382743419e-06, "loss": 1.611, "step": 26380 }, { "epoch": 0.7679553189632603, "grad_norm": 13.6875, "learning_rate": 7.440151101070804e-06, "loss": 1.6904, "step": 26400 }, { "epoch": 0.7685371032958083, "grad_norm": 12.625, "learning_rate": 7.438211819398189e-06, "loss": 1.6579, "step": 26420 }, { "epoch": 0.7691188876283561, "grad_norm": 11.125, "learning_rate": 7.4362725377255744e-06, "loss": 1.584, "step": 26440 }, { "epoch": 0.7697006719609041, "grad_norm": 13.8125, "learning_rate": 7.434333256052959e-06, "loss": 1.6818, "step": 26460 }, { "epoch": 0.770282456293452, "grad_norm": 15.75, "learning_rate": 7.432393974380344e-06, "loss": 1.6551, "step": 26480 }, { "epoch": 0.7708642406259999, "grad_norm": 12.75, "learning_rate": 7.430454692707729e-06, "loss": 1.672, "step": 26500 }, { "epoch": 0.7714460249585479, "grad_norm": 11.875, "learning_rate": 7.428515411035114e-06, "loss": 1.6364, "step": 26520 }, { "epoch": 0.7720278092910958, "grad_norm": 11.25, "learning_rate": 7.426576129362499e-06, "loss": 1.6867, "step": 26540 }, { "epoch": 0.7726095936236437, "grad_norm": 12.375, "learning_rate": 7.424636847689884e-06, "loss": 1.6134, "step": 26560 }, { "epoch": 0.7731913779561916, "grad_norm": 14.1875, "learning_rate": 7.422697566017269e-06, "loss": 1.5726, "step": 26580 }, { "epoch": 0.7737731622887396, "grad_norm": 14.25, "learning_rate": 7.420758284344654e-06, "loss": 1.6205, "step": 26600 }, { "epoch": 0.7743549466212875, "grad_norm": 10.875, "learning_rate": 7.4188190026720395e-06, "loss": 1.6047, "step": 26620 }, { "epoch": 0.7749367309538354, "grad_norm": 13.0, "learning_rate": 7.4168797209994246e-06, "loss": 1.6801, "step": 26640 }, { "epoch": 0.7755185152863834, "grad_norm": 12.75, "learning_rate": 7.41494043932681e-06, "loss": 1.7161, "step": 26660 }, { "epoch": 0.7761002996189312, "grad_norm": 12.1875, "learning_rate": 7.413001157654195e-06, "loss": 1.6308, "step": 26680 }, { "epoch": 0.7766820839514792, "grad_norm": 14.4375, "learning_rate": 7.41106187598158e-06, "loss": 1.5738, "step": 26700 }, { "epoch": 0.7772638682840272, "grad_norm": 11.5, "learning_rate": 7.409122594308965e-06, "loss": 1.6099, "step": 26720 }, { "epoch": 0.777845652616575, "grad_norm": 11.375, "learning_rate": 7.40718331263635e-06, "loss": 1.6528, "step": 26740 }, { "epoch": 0.778427436949123, "grad_norm": 12.75, "learning_rate": 7.405244030963735e-06, "loss": 1.6658, "step": 26760 }, { "epoch": 0.7790092212816709, "grad_norm": 11.5, "learning_rate": 7.40330474929112e-06, "loss": 1.6278, "step": 26780 }, { "epoch": 0.7795910056142188, "grad_norm": 11.25, "learning_rate": 7.401365467618505e-06, "loss": 1.6695, "step": 26800 }, { "epoch": 0.7801727899467668, "grad_norm": 15.0, "learning_rate": 7.3994261859458904e-06, "loss": 1.6491, "step": 26820 }, { "epoch": 0.7807545742793146, "grad_norm": 13.25, "learning_rate": 7.3974869042732755e-06, "loss": 1.6186, "step": 26840 }, { "epoch": 0.7813363586118626, "grad_norm": 12.4375, "learning_rate": 7.395547622600661e-06, "loss": 1.5884, "step": 26860 }, { "epoch": 0.7819181429444105, "grad_norm": 13.25, "learning_rate": 7.393608340928046e-06, "loss": 1.6544, "step": 26880 }, { "epoch": 0.7824999272769584, "grad_norm": 13.5, "learning_rate": 7.391669059255431e-06, "loss": 1.6644, "step": 26900 }, { "epoch": 0.7830817116095063, "grad_norm": 9.5, "learning_rate": 7.389729777582816e-06, "loss": 1.6021, "step": 26920 }, { "epoch": 0.7836634959420543, "grad_norm": 14.0625, "learning_rate": 7.387790495910201e-06, "loss": 1.5777, "step": 26940 }, { "epoch": 0.7842452802746022, "grad_norm": 10.5, "learning_rate": 7.385851214237586e-06, "loss": 1.5951, "step": 26960 }, { "epoch": 0.7848270646071501, "grad_norm": 12.6875, "learning_rate": 7.383911932564971e-06, "loss": 1.5828, "step": 26980 }, { "epoch": 0.7854088489396981, "grad_norm": 14.875, "learning_rate": 7.381972650892356e-06, "loss": 1.7095, "step": 27000 }, { "epoch": 0.7859906332722459, "grad_norm": 12.25, "learning_rate": 7.380033369219741e-06, "loss": 1.642, "step": 27020 }, { "epoch": 0.7865724176047939, "grad_norm": 12.5625, "learning_rate": 7.3780940875471265e-06, "loss": 1.6102, "step": 27040 }, { "epoch": 0.7871542019373419, "grad_norm": 13.1875, "learning_rate": 7.376154805874512e-06, "loss": 1.6857, "step": 27060 }, { "epoch": 0.7877359862698897, "grad_norm": 11.9375, "learning_rate": 7.374215524201897e-06, "loss": 1.6778, "step": 27080 }, { "epoch": 0.7883177706024377, "grad_norm": 15.375, "learning_rate": 7.372276242529282e-06, "loss": 1.6085, "step": 27100 }, { "epoch": 0.7888995549349856, "grad_norm": 13.8125, "learning_rate": 7.370336960856667e-06, "loss": 1.6804, "step": 27120 }, { "epoch": 0.7894813392675335, "grad_norm": 10.0625, "learning_rate": 7.368397679184052e-06, "loss": 1.6082, "step": 27140 }, { "epoch": 0.7900631236000815, "grad_norm": 10.4375, "learning_rate": 7.366458397511437e-06, "loss": 1.6325, "step": 27160 }, { "epoch": 0.7906449079326294, "grad_norm": 12.25, "learning_rate": 7.364519115838822e-06, "loss": 1.6147, "step": 27180 }, { "epoch": 0.7912266922651773, "grad_norm": 11.0, "learning_rate": 7.362579834166207e-06, "loss": 1.6164, "step": 27200 }, { "epoch": 0.7918084765977252, "grad_norm": 10.125, "learning_rate": 7.360640552493592e-06, "loss": 1.7442, "step": 27220 }, { "epoch": 0.7923902609302731, "grad_norm": 12.125, "learning_rate": 7.3587012708209775e-06, "loss": 1.6424, "step": 27240 }, { "epoch": 0.7929720452628211, "grad_norm": 11.5625, "learning_rate": 7.356761989148362e-06, "loss": 1.6828, "step": 27260 }, { "epoch": 0.793553829595369, "grad_norm": 11.8125, "learning_rate": 7.354822707475747e-06, "loss": 1.6025, "step": 27280 }, { "epoch": 0.7941356139279169, "grad_norm": 12.6875, "learning_rate": 7.352883425803132e-06, "loss": 1.6738, "step": 27300 }, { "epoch": 0.7947173982604648, "grad_norm": 13.875, "learning_rate": 7.350944144130517e-06, "loss": 1.5983, "step": 27320 }, { "epoch": 0.7952991825930128, "grad_norm": 10.75, "learning_rate": 7.349004862457902e-06, "loss": 1.6961, "step": 27340 }, { "epoch": 0.7958809669255607, "grad_norm": 12.5625, "learning_rate": 7.347065580785287e-06, "loss": 1.6454, "step": 27360 }, { "epoch": 0.7964627512581086, "grad_norm": 14.25, "learning_rate": 7.345126299112672e-06, "loss": 1.6563, "step": 27380 }, { "epoch": 0.7970445355906566, "grad_norm": 14.125, "learning_rate": 7.343187017440057e-06, "loss": 1.6725, "step": 27400 }, { "epoch": 0.7976263199232044, "grad_norm": 9.8125, "learning_rate": 7.3412477357674425e-06, "loss": 1.7038, "step": 27420 }, { "epoch": 0.7982081042557524, "grad_norm": 12.125, "learning_rate": 7.339308454094828e-06, "loss": 1.6789, "step": 27440 }, { "epoch": 0.7987898885883004, "grad_norm": 11.625, "learning_rate": 7.337369172422213e-06, "loss": 1.5602, "step": 27460 }, { "epoch": 0.7993716729208482, "grad_norm": 11.125, "learning_rate": 7.335429890749598e-06, "loss": 1.6001, "step": 27480 }, { "epoch": 0.7999534572533962, "grad_norm": 12.1875, "learning_rate": 7.333490609076983e-06, "loss": 1.6053, "step": 27500 }, { "epoch": 0.8005352415859441, "grad_norm": 12.8125, "learning_rate": 7.331551327404368e-06, "loss": 1.5731, "step": 27520 }, { "epoch": 0.801117025918492, "grad_norm": 18.25, "learning_rate": 7.329612045731753e-06, "loss": 1.6737, "step": 27540 }, { "epoch": 0.80169881025104, "grad_norm": 13.8125, "learning_rate": 7.327672764059138e-06, "loss": 1.7213, "step": 27560 }, { "epoch": 0.8022805945835879, "grad_norm": 11.25, "learning_rate": 7.325733482386523e-06, "loss": 1.699, "step": 27580 }, { "epoch": 0.8028623789161358, "grad_norm": 16.75, "learning_rate": 7.323794200713908e-06, "loss": 1.655, "step": 27600 }, { "epoch": 0.8034441632486837, "grad_norm": 13.125, "learning_rate": 7.3218549190412935e-06, "loss": 1.6234, "step": 27620 }, { "epoch": 0.8040259475812317, "grad_norm": 14.4375, "learning_rate": 7.319915637368679e-06, "loss": 1.6227, "step": 27640 }, { "epoch": 0.8046077319137795, "grad_norm": 10.25, "learning_rate": 7.317976355696064e-06, "loss": 1.6854, "step": 27660 }, { "epoch": 0.8051895162463275, "grad_norm": 10.875, "learning_rate": 7.316037074023449e-06, "loss": 1.6491, "step": 27680 }, { "epoch": 0.8057713005788754, "grad_norm": 11.75, "learning_rate": 7.314097792350834e-06, "loss": 1.5881, "step": 27700 }, { "epoch": 0.8063530849114233, "grad_norm": 14.5, "learning_rate": 7.312158510678219e-06, "loss": 1.6704, "step": 27720 }, { "epoch": 0.8069348692439713, "grad_norm": 13.5625, "learning_rate": 7.310219229005604e-06, "loss": 1.5579, "step": 27740 }, { "epoch": 0.8075166535765191, "grad_norm": 14.6875, "learning_rate": 7.308279947332989e-06, "loss": 1.4641, "step": 27760 }, { "epoch": 0.8080984379090671, "grad_norm": 14.25, "learning_rate": 7.306340665660374e-06, "loss": 1.565, "step": 27780 }, { "epoch": 0.8086802222416151, "grad_norm": 13.125, "learning_rate": 7.304401383987759e-06, "loss": 1.6324, "step": 27800 }, { "epoch": 0.8092620065741629, "grad_norm": 13.0, "learning_rate": 7.3024621023151445e-06, "loss": 1.6607, "step": 27820 }, { "epoch": 0.8098437909067109, "grad_norm": 12.75, "learning_rate": 7.3005228206425296e-06, "loss": 1.6168, "step": 27840 }, { "epoch": 0.8104255752392588, "grad_norm": 12.125, "learning_rate": 7.298583538969915e-06, "loss": 1.6173, "step": 27860 }, { "epoch": 0.8110073595718067, "grad_norm": 10.6875, "learning_rate": 7.2966442572973e-06, "loss": 1.6901, "step": 27880 }, { "epoch": 0.8115891439043547, "grad_norm": 12.4375, "learning_rate": 7.294704975624685e-06, "loss": 1.6655, "step": 27900 }, { "epoch": 0.8121709282369026, "grad_norm": 12.8125, "learning_rate": 7.29276569395207e-06, "loss": 1.5395, "step": 27920 }, { "epoch": 0.8127527125694505, "grad_norm": 13.25, "learning_rate": 7.290826412279455e-06, "loss": 1.4986, "step": 27940 }, { "epoch": 0.8133344969019984, "grad_norm": 11.6875, "learning_rate": 7.28888713060684e-06, "loss": 1.6638, "step": 27960 }, { "epoch": 0.8139162812345464, "grad_norm": 11.5, "learning_rate": 7.286947848934225e-06, "loss": 1.5842, "step": 27980 }, { "epoch": 0.8144980655670943, "grad_norm": 12.75, "learning_rate": 7.28500856726161e-06, "loss": 1.6284, "step": 28000 }, { "epoch": 0.8150798498996422, "grad_norm": 10.25, "learning_rate": 7.2830692855889954e-06, "loss": 1.6638, "step": 28020 }, { "epoch": 0.8156616342321902, "grad_norm": 13.125, "learning_rate": 7.2811300039163805e-06, "loss": 1.7014, "step": 28040 }, { "epoch": 0.816243418564738, "grad_norm": 12.125, "learning_rate": 7.279190722243766e-06, "loss": 1.6751, "step": 28060 }, { "epoch": 0.816825202897286, "grad_norm": 14.0, "learning_rate": 7.27725144057115e-06, "loss": 1.651, "step": 28080 }, { "epoch": 0.8174069872298338, "grad_norm": 13.25, "learning_rate": 7.275312158898535e-06, "loss": 1.5778, "step": 28100 }, { "epoch": 0.8179887715623818, "grad_norm": 15.1875, "learning_rate": 7.27337287722592e-06, "loss": 1.6771, "step": 28120 }, { "epoch": 0.8185705558949298, "grad_norm": 12.375, "learning_rate": 7.271433595553305e-06, "loss": 1.6015, "step": 28140 }, { "epoch": 0.8191523402274776, "grad_norm": 12.625, "learning_rate": 7.26949431388069e-06, "loss": 1.6831, "step": 28160 }, { "epoch": 0.8197341245600256, "grad_norm": 9.8125, "learning_rate": 7.267555032208075e-06, "loss": 1.6934, "step": 28180 }, { "epoch": 0.8203159088925736, "grad_norm": 13.9375, "learning_rate": 7.2656157505354605e-06, "loss": 1.6941, "step": 28200 }, { "epoch": 0.8208976932251214, "grad_norm": 11.875, "learning_rate": 7.2636764688628456e-06, "loss": 1.5997, "step": 28220 }, { "epoch": 0.8214794775576694, "grad_norm": 16.5, "learning_rate": 7.261737187190231e-06, "loss": 1.6822, "step": 28240 }, { "epoch": 0.8220612618902173, "grad_norm": 11.375, "learning_rate": 7.259797905517616e-06, "loss": 1.4695, "step": 28260 }, { "epoch": 0.8226430462227652, "grad_norm": 17.0, "learning_rate": 7.257858623845001e-06, "loss": 1.6311, "step": 28280 }, { "epoch": 0.8232248305553131, "grad_norm": 10.5, "learning_rate": 7.255919342172386e-06, "loss": 1.6257, "step": 28300 }, { "epoch": 0.8238066148878611, "grad_norm": 13.5, "learning_rate": 7.253980060499771e-06, "loss": 1.6623, "step": 28320 }, { "epoch": 0.824388399220409, "grad_norm": 12.4375, "learning_rate": 7.252040778827156e-06, "loss": 1.7535, "step": 28340 }, { "epoch": 0.8249701835529569, "grad_norm": 11.5, "learning_rate": 7.250101497154541e-06, "loss": 1.6129, "step": 28360 }, { "epoch": 0.8255519678855049, "grad_norm": 13.375, "learning_rate": 7.248162215481926e-06, "loss": 1.6641, "step": 28380 }, { "epoch": 0.8261337522180527, "grad_norm": 14.0625, "learning_rate": 7.2462229338093114e-06, "loss": 1.6582, "step": 28400 }, { "epoch": 0.8267155365506007, "grad_norm": 10.75, "learning_rate": 7.2442836521366965e-06, "loss": 1.5584, "step": 28420 }, { "epoch": 0.8272973208831487, "grad_norm": 12.625, "learning_rate": 7.242344370464082e-06, "loss": 1.6738, "step": 28440 }, { "epoch": 0.8278791052156965, "grad_norm": 13.375, "learning_rate": 7.240405088791467e-06, "loss": 1.6583, "step": 28460 }, { "epoch": 0.8284608895482445, "grad_norm": 13.75, "learning_rate": 7.238465807118852e-06, "loss": 1.5918, "step": 28480 }, { "epoch": 0.8290426738807923, "grad_norm": 16.0, "learning_rate": 7.236526525446237e-06, "loss": 1.6061, "step": 28500 }, { "epoch": 0.8296244582133403, "grad_norm": 12.0625, "learning_rate": 7.234587243773622e-06, "loss": 1.6719, "step": 28520 }, { "epoch": 0.8302062425458883, "grad_norm": 13.0, "learning_rate": 7.232647962101007e-06, "loss": 1.5677, "step": 28540 }, { "epoch": 0.8307880268784361, "grad_norm": 13.1875, "learning_rate": 7.230708680428392e-06, "loss": 1.6761, "step": 28560 }, { "epoch": 0.8313698112109841, "grad_norm": 15.5, "learning_rate": 7.228769398755777e-06, "loss": 1.6747, "step": 28580 }, { "epoch": 0.831951595543532, "grad_norm": 11.375, "learning_rate": 7.226830117083162e-06, "loss": 1.683, "step": 28600 }, { "epoch": 0.8325333798760799, "grad_norm": 12.4375, "learning_rate": 7.2248908354105475e-06, "loss": 1.6348, "step": 28620 }, { "epoch": 0.8331151642086279, "grad_norm": 21.625, "learning_rate": 7.222951553737933e-06, "loss": 1.5586, "step": 28640 }, { "epoch": 0.8336969485411758, "grad_norm": 11.5, "learning_rate": 7.221012272065318e-06, "loss": 1.6519, "step": 28660 }, { "epoch": 0.8342787328737237, "grad_norm": 11.0, "learning_rate": 7.219072990392703e-06, "loss": 1.6271, "step": 28680 }, { "epoch": 0.8348605172062716, "grad_norm": 12.375, "learning_rate": 7.217133708720088e-06, "loss": 1.6389, "step": 28700 }, { "epoch": 0.8354423015388196, "grad_norm": 11.9375, "learning_rate": 7.215194427047473e-06, "loss": 1.7004, "step": 28720 }, { "epoch": 0.8360240858713675, "grad_norm": 13.0625, "learning_rate": 7.213255145374858e-06, "loss": 1.6365, "step": 28740 }, { "epoch": 0.8366058702039154, "grad_norm": 12.9375, "learning_rate": 7.211315863702243e-06, "loss": 1.6538, "step": 28760 }, { "epoch": 0.8371876545364634, "grad_norm": 10.625, "learning_rate": 7.209376582029628e-06, "loss": 1.6667, "step": 28780 }, { "epoch": 0.8377694388690112, "grad_norm": 13.25, "learning_rate": 7.207437300357013e-06, "loss": 1.5666, "step": 28800 }, { "epoch": 0.8383512232015592, "grad_norm": 14.4375, "learning_rate": 7.2054980186843985e-06, "loss": 1.6309, "step": 28820 }, { "epoch": 0.8389330075341072, "grad_norm": 13.3125, "learning_rate": 7.203558737011784e-06, "loss": 1.622, "step": 28840 }, { "epoch": 0.839514791866655, "grad_norm": 13.625, "learning_rate": 7.201619455339169e-06, "loss": 1.5475, "step": 28860 }, { "epoch": 0.840096576199203, "grad_norm": 12.5, "learning_rate": 7.199680173666554e-06, "loss": 1.6515, "step": 28880 }, { "epoch": 0.8406783605317509, "grad_norm": 10.25, "learning_rate": 7.197740891993938e-06, "loss": 1.5814, "step": 28900 }, { "epoch": 0.8412601448642988, "grad_norm": 9.1875, "learning_rate": 7.195801610321323e-06, "loss": 1.6171, "step": 28920 }, { "epoch": 0.8418419291968467, "grad_norm": 13.0, "learning_rate": 7.193862328648708e-06, "loss": 1.6931, "step": 28940 }, { "epoch": 0.8424237135293946, "grad_norm": 13.875, "learning_rate": 7.191923046976093e-06, "loss": 1.6066, "step": 28960 }, { "epoch": 0.8430054978619426, "grad_norm": 11.75, "learning_rate": 7.189983765303478e-06, "loss": 1.5293, "step": 28980 }, { "epoch": 0.8435872821944905, "grad_norm": 13.5, "learning_rate": 7.1880444836308635e-06, "loss": 1.5974, "step": 29000 }, { "epoch": 0.8441690665270384, "grad_norm": 12.25, "learning_rate": 7.186105201958249e-06, "loss": 1.6792, "step": 29020 }, { "epoch": 0.8447508508595863, "grad_norm": 11.0625, "learning_rate": 7.184165920285634e-06, "loss": 1.6707, "step": 29040 }, { "epoch": 0.8453326351921343, "grad_norm": 11.5, "learning_rate": 7.182226638613018e-06, "loss": 1.5823, "step": 29060 }, { "epoch": 0.8459144195246822, "grad_norm": 14.875, "learning_rate": 7.180287356940403e-06, "loss": 1.6895, "step": 29080 }, { "epoch": 0.8464962038572301, "grad_norm": 13.8125, "learning_rate": 7.178348075267788e-06, "loss": 1.5561, "step": 29100 }, { "epoch": 0.8470779881897781, "grad_norm": 14.3125, "learning_rate": 7.176408793595173e-06, "loss": 1.6273, "step": 29120 }, { "epoch": 0.8476597725223259, "grad_norm": 12.8125, "learning_rate": 7.174469511922558e-06, "loss": 1.6924, "step": 29140 }, { "epoch": 0.8482415568548739, "grad_norm": 11.6875, "learning_rate": 7.1725302302499434e-06, "loss": 1.5519, "step": 29160 }, { "epoch": 0.8488233411874219, "grad_norm": 12.0625, "learning_rate": 7.1705909485773285e-06, "loss": 1.7582, "step": 29180 }, { "epoch": 0.8494051255199697, "grad_norm": 12.4375, "learning_rate": 7.168651666904714e-06, "loss": 1.6532, "step": 29200 }, { "epoch": 0.8499869098525177, "grad_norm": 11.875, "learning_rate": 7.166712385232099e-06, "loss": 1.6266, "step": 29220 }, { "epoch": 0.8505686941850656, "grad_norm": 13.1875, "learning_rate": 7.164773103559484e-06, "loss": 1.6186, "step": 29240 }, { "epoch": 0.8511504785176135, "grad_norm": 11.0625, "learning_rate": 7.162833821886869e-06, "loss": 1.5315, "step": 29260 }, { "epoch": 0.8517322628501615, "grad_norm": 12.375, "learning_rate": 7.160894540214254e-06, "loss": 1.5611, "step": 29280 }, { "epoch": 0.8523140471827094, "grad_norm": 12.125, "learning_rate": 7.158955258541639e-06, "loss": 1.56, "step": 29300 }, { "epoch": 0.8528958315152573, "grad_norm": 12.5625, "learning_rate": 7.157015976869024e-06, "loss": 1.6365, "step": 29320 }, { "epoch": 0.8534776158478052, "grad_norm": 13.5, "learning_rate": 7.155076695196409e-06, "loss": 1.7178, "step": 29340 }, { "epoch": 0.8540594001803531, "grad_norm": 15.1875, "learning_rate": 7.153137413523794e-06, "loss": 1.7602, "step": 29360 }, { "epoch": 0.854641184512901, "grad_norm": 12.6875, "learning_rate": 7.1511981318511795e-06, "loss": 1.6393, "step": 29380 }, { "epoch": 0.855222968845449, "grad_norm": 10.6875, "learning_rate": 7.149258850178565e-06, "loss": 1.7082, "step": 29400 }, { "epoch": 0.8558047531779969, "grad_norm": 11.75, "learning_rate": 7.14731956850595e-06, "loss": 1.6048, "step": 29420 }, { "epoch": 0.8563865375105448, "grad_norm": 15.1875, "learning_rate": 7.145380286833335e-06, "loss": 1.601, "step": 29440 }, { "epoch": 0.8569683218430928, "grad_norm": 12.125, "learning_rate": 7.143441005160719e-06, "loss": 1.6411, "step": 29460 }, { "epoch": 0.8575501061756406, "grad_norm": 14.9375, "learning_rate": 7.141501723488104e-06, "loss": 1.6465, "step": 29480 }, { "epoch": 0.8581318905081886, "grad_norm": 12.4375, "learning_rate": 7.139562441815489e-06, "loss": 1.6785, "step": 29500 }, { "epoch": 0.8587136748407366, "grad_norm": 12.5625, "learning_rate": 7.137623160142874e-06, "loss": 1.6321, "step": 29520 }, { "epoch": 0.8592954591732844, "grad_norm": 12.9375, "learning_rate": 7.1356838784702595e-06, "loss": 1.6339, "step": 29540 }, { "epoch": 0.8598772435058324, "grad_norm": 14.375, "learning_rate": 7.1337445967976445e-06, "loss": 1.6174, "step": 29560 }, { "epoch": 0.8604590278383804, "grad_norm": 11.5, "learning_rate": 7.13180531512503e-06, "loss": 1.5598, "step": 29580 }, { "epoch": 0.8610408121709282, "grad_norm": 8.625, "learning_rate": 7.129866033452415e-06, "loss": 1.6395, "step": 29600 }, { "epoch": 0.8616225965034762, "grad_norm": 14.0625, "learning_rate": 7.1279267517798e-06, "loss": 1.779, "step": 29620 }, { "epoch": 0.8622043808360241, "grad_norm": 12.375, "learning_rate": 7.125987470107185e-06, "loss": 1.6451, "step": 29640 }, { "epoch": 0.862786165168572, "grad_norm": 14.0625, "learning_rate": 7.12404818843457e-06, "loss": 1.523, "step": 29660 }, { "epoch": 0.86336794950112, "grad_norm": 12.4375, "learning_rate": 7.122108906761955e-06, "loss": 1.6644, "step": 29680 }, { "epoch": 0.8639497338336679, "grad_norm": 13.75, "learning_rate": 7.12016962508934e-06, "loss": 1.6535, "step": 29700 }, { "epoch": 0.8645315181662158, "grad_norm": 11.8125, "learning_rate": 7.118230343416725e-06, "loss": 1.6443, "step": 29720 }, { "epoch": 0.8651133024987637, "grad_norm": 13.75, "learning_rate": 7.1162910617441104e-06, "loss": 1.5852, "step": 29740 }, { "epoch": 0.8656950868313116, "grad_norm": 9.8125, "learning_rate": 7.1143517800714955e-06, "loss": 1.5577, "step": 29760 }, { "epoch": 0.8662768711638595, "grad_norm": 14.8125, "learning_rate": 7.112412498398881e-06, "loss": 1.5347, "step": 29780 }, { "epoch": 0.8668586554964075, "grad_norm": 11.1875, "learning_rate": 7.110473216726266e-06, "loss": 1.6938, "step": 29800 }, { "epoch": 0.8674404398289554, "grad_norm": 12.1875, "learning_rate": 7.108533935053651e-06, "loss": 1.5838, "step": 29820 }, { "epoch": 0.8680222241615033, "grad_norm": 12.1875, "learning_rate": 7.106594653381036e-06, "loss": 1.654, "step": 29840 }, { "epoch": 0.8686040084940513, "grad_norm": 10.8125, "learning_rate": 7.104655371708421e-06, "loss": 1.6771, "step": 29860 }, { "epoch": 0.8691857928265991, "grad_norm": 14.25, "learning_rate": 7.102716090035806e-06, "loss": 1.7066, "step": 29880 }, { "epoch": 0.8697675771591471, "grad_norm": 12.1875, "learning_rate": 7.100776808363191e-06, "loss": 1.6345, "step": 29900 }, { "epoch": 0.8703493614916951, "grad_norm": 13.375, "learning_rate": 7.098837526690576e-06, "loss": 1.5612, "step": 29920 }, { "epoch": 0.8709311458242429, "grad_norm": 12.0625, "learning_rate": 7.096898245017961e-06, "loss": 1.7013, "step": 29940 }, { "epoch": 0.8715129301567909, "grad_norm": 13.8125, "learning_rate": 7.0949589633453465e-06, "loss": 1.6588, "step": 29960 }, { "epoch": 0.8720947144893388, "grad_norm": 11.125, "learning_rate": 7.093019681672732e-06, "loss": 1.6883, "step": 29980 }, { "epoch": 0.8726764988218867, "grad_norm": 14.875, "learning_rate": 7.091080400000117e-06, "loss": 1.657, "step": 30000 }, { "epoch": 0.8732582831544347, "grad_norm": 10.375, "learning_rate": 7.089141118327502e-06, "loss": 1.6111, "step": 30020 }, { "epoch": 0.8738400674869826, "grad_norm": 10.5625, "learning_rate": 7.087201836654887e-06, "loss": 1.6375, "step": 30040 }, { "epoch": 0.8744218518195305, "grad_norm": 11.0625, "learning_rate": 7.085262554982272e-06, "loss": 1.5596, "step": 30060 }, { "epoch": 0.8750036361520784, "grad_norm": 9.6875, "learning_rate": 7.083323273309657e-06, "loss": 1.6095, "step": 30080 }, { "epoch": 0.8755854204846264, "grad_norm": 14.4375, "learning_rate": 7.081383991637042e-06, "loss": 1.5589, "step": 30100 }, { "epoch": 0.8761672048171742, "grad_norm": 12.125, "learning_rate": 7.079444709964427e-06, "loss": 1.5975, "step": 30120 }, { "epoch": 0.8767489891497222, "grad_norm": 12.375, "learning_rate": 7.077505428291812e-06, "loss": 1.6044, "step": 30140 }, { "epoch": 0.8773307734822702, "grad_norm": 10.6875, "learning_rate": 7.0755661466191975e-06, "loss": 1.6167, "step": 30160 }, { "epoch": 0.877912557814818, "grad_norm": 12.0, "learning_rate": 7.0736268649465826e-06, "loss": 1.6748, "step": 30180 }, { "epoch": 0.878494342147366, "grad_norm": 10.75, "learning_rate": 7.071687583273968e-06, "loss": 1.651, "step": 30200 }, { "epoch": 0.8790761264799138, "grad_norm": 13.25, "learning_rate": 7.069748301601353e-06, "loss": 1.7351, "step": 30220 }, { "epoch": 0.8796579108124618, "grad_norm": 12.5625, "learning_rate": 7.067809019928738e-06, "loss": 1.5913, "step": 30240 }, { "epoch": 0.8802396951450098, "grad_norm": 16.5, "learning_rate": 7.065869738256123e-06, "loss": 1.5976, "step": 30260 }, { "epoch": 0.8808214794775576, "grad_norm": 10.5625, "learning_rate": 7.063930456583507e-06, "loss": 1.712, "step": 30280 }, { "epoch": 0.8814032638101056, "grad_norm": 8.625, "learning_rate": 7.061991174910892e-06, "loss": 1.7202, "step": 30300 }, { "epoch": 0.8819850481426535, "grad_norm": 11.875, "learning_rate": 7.060051893238277e-06, "loss": 1.6646, "step": 30320 }, { "epoch": 0.8825668324752014, "grad_norm": 11.4375, "learning_rate": 7.0581126115656625e-06, "loss": 1.6245, "step": 30340 }, { "epoch": 0.8831486168077494, "grad_norm": 13.75, "learning_rate": 7.056173329893048e-06, "loss": 1.6251, "step": 30360 }, { "epoch": 0.8837304011402973, "grad_norm": 12.625, "learning_rate": 7.054234048220433e-06, "loss": 1.6073, "step": 30380 }, { "epoch": 0.8843121854728452, "grad_norm": 12.875, "learning_rate": 7.052294766547818e-06, "loss": 1.6597, "step": 30400 }, { "epoch": 0.8848939698053931, "grad_norm": 13.875, "learning_rate": 7.050355484875203e-06, "loss": 1.5641, "step": 30420 }, { "epoch": 0.8854757541379411, "grad_norm": 13.0625, "learning_rate": 7.048416203202588e-06, "loss": 1.5914, "step": 30440 }, { "epoch": 0.886057538470489, "grad_norm": 11.875, "learning_rate": 7.046476921529973e-06, "loss": 1.6748, "step": 30460 }, { "epoch": 0.8866393228030369, "grad_norm": 12.0625, "learning_rate": 7.044537639857358e-06, "loss": 1.6939, "step": 30480 }, { "epoch": 0.8872211071355849, "grad_norm": 11.0, "learning_rate": 7.042598358184743e-06, "loss": 1.5961, "step": 30500 }, { "epoch": 0.8878028914681327, "grad_norm": 12.625, "learning_rate": 7.040659076512128e-06, "loss": 1.6494, "step": 30520 }, { "epoch": 0.8883846758006807, "grad_norm": 11.25, "learning_rate": 7.0387197948395135e-06, "loss": 1.6075, "step": 30540 }, { "epoch": 0.8889664601332287, "grad_norm": 10.875, "learning_rate": 7.0367805131668986e-06, "loss": 1.6193, "step": 30560 }, { "epoch": 0.8895482444657765, "grad_norm": 15.5625, "learning_rate": 7.034841231494284e-06, "loss": 1.5741, "step": 30580 }, { "epoch": 0.8901300287983245, "grad_norm": 13.375, "learning_rate": 7.032901949821669e-06, "loss": 1.6317, "step": 30600 }, { "epoch": 0.8907118131308723, "grad_norm": 12.1875, "learning_rate": 7.030962668149054e-06, "loss": 1.6391, "step": 30620 }, { "epoch": 0.8912935974634203, "grad_norm": 12.25, "learning_rate": 7.029023386476439e-06, "loss": 1.6586, "step": 30640 }, { "epoch": 0.8918753817959683, "grad_norm": 11.75, "learning_rate": 7.027084104803824e-06, "loss": 1.7167, "step": 30660 }, { "epoch": 0.8924571661285161, "grad_norm": 14.0625, "learning_rate": 7.025144823131209e-06, "loss": 1.6195, "step": 30680 }, { "epoch": 0.8930389504610641, "grad_norm": 11.1875, "learning_rate": 7.023205541458594e-06, "loss": 1.6202, "step": 30700 }, { "epoch": 0.893620734793612, "grad_norm": 11.1875, "learning_rate": 7.021266259785979e-06, "loss": 1.7246, "step": 30720 }, { "epoch": 0.8942025191261599, "grad_norm": 14.5, "learning_rate": 7.0193269781133645e-06, "loss": 1.6541, "step": 30740 }, { "epoch": 0.8947843034587079, "grad_norm": 14.125, "learning_rate": 7.0173876964407496e-06, "loss": 1.7151, "step": 30760 }, { "epoch": 0.8953660877912558, "grad_norm": 12.5625, "learning_rate": 7.015448414768135e-06, "loss": 1.6388, "step": 30780 }, { "epoch": 0.8959478721238037, "grad_norm": 12.9375, "learning_rate": 7.01350913309552e-06, "loss": 1.6886, "step": 30800 }, { "epoch": 0.8965296564563516, "grad_norm": 14.5, "learning_rate": 7.011569851422905e-06, "loss": 1.7274, "step": 30820 }, { "epoch": 0.8971114407888996, "grad_norm": 11.375, "learning_rate": 7.00963056975029e-06, "loss": 1.5491, "step": 30840 }, { "epoch": 0.8976932251214474, "grad_norm": 11.75, "learning_rate": 7.007691288077675e-06, "loss": 1.7166, "step": 30860 }, { "epoch": 0.8982750094539954, "grad_norm": 11.0, "learning_rate": 7.00575200640506e-06, "loss": 1.4878, "step": 30880 }, { "epoch": 0.8988567937865434, "grad_norm": 14.5, "learning_rate": 7.003812724732445e-06, "loss": 1.4887, "step": 30900 }, { "epoch": 0.8994385781190912, "grad_norm": 12.375, "learning_rate": 7.00187344305983e-06, "loss": 1.6791, "step": 30920 }, { "epoch": 0.9000203624516392, "grad_norm": 14.0, "learning_rate": 6.9999341613872154e-06, "loss": 1.6454, "step": 30940 }, { "epoch": 0.9006021467841872, "grad_norm": 12.375, "learning_rate": 6.9979948797146005e-06, "loss": 1.5783, "step": 30960 }, { "epoch": 0.901183931116735, "grad_norm": 13.5625, "learning_rate": 6.996055598041986e-06, "loss": 1.6508, "step": 30980 }, { "epoch": 0.901765715449283, "grad_norm": 15.4375, "learning_rate": 6.994116316369371e-06, "loss": 1.6058, "step": 31000 }, { "epoch": 0.9023474997818309, "grad_norm": 11.4375, "learning_rate": 6.992177034696756e-06, "loss": 1.6019, "step": 31020 }, { "epoch": 0.9029292841143788, "grad_norm": 11.125, "learning_rate": 6.990237753024141e-06, "loss": 1.7452, "step": 31040 }, { "epoch": 0.9035110684469267, "grad_norm": 11.6875, "learning_rate": 6.988298471351526e-06, "loss": 1.6808, "step": 31060 }, { "epoch": 0.9040928527794746, "grad_norm": 13.125, "learning_rate": 6.986359189678911e-06, "loss": 1.6858, "step": 31080 }, { "epoch": 0.9046746371120226, "grad_norm": 13.625, "learning_rate": 6.984419908006295e-06, "loss": 1.7046, "step": 31100 }, { "epoch": 0.9052564214445705, "grad_norm": 12.1875, "learning_rate": 6.9824806263336805e-06, "loss": 1.6436, "step": 31120 }, { "epoch": 0.9058382057771184, "grad_norm": 11.0, "learning_rate": 6.9805413446610656e-06, "loss": 1.7363, "step": 31140 }, { "epoch": 0.9064199901096663, "grad_norm": 12.125, "learning_rate": 6.978602062988451e-06, "loss": 1.6176, "step": 31160 }, { "epoch": 0.9070017744422143, "grad_norm": 12.3125, "learning_rate": 6.976662781315836e-06, "loss": 1.6335, "step": 31180 }, { "epoch": 0.9075835587747622, "grad_norm": 11.8125, "learning_rate": 6.974723499643221e-06, "loss": 1.5611, "step": 31200 }, { "epoch": 0.9081653431073101, "grad_norm": 10.6875, "learning_rate": 6.972784217970606e-06, "loss": 1.6538, "step": 31220 }, { "epoch": 0.9087471274398581, "grad_norm": 12.3125, "learning_rate": 6.970844936297991e-06, "loss": 1.6281, "step": 31240 }, { "epoch": 0.9093289117724059, "grad_norm": 12.1875, "learning_rate": 6.968905654625376e-06, "loss": 1.7004, "step": 31260 }, { "epoch": 0.9099106961049539, "grad_norm": 12.6875, "learning_rate": 6.966966372952761e-06, "loss": 1.7177, "step": 31280 }, { "epoch": 0.9104924804375019, "grad_norm": 11.1875, "learning_rate": 6.965027091280146e-06, "loss": 1.591, "step": 31300 }, { "epoch": 0.9110742647700497, "grad_norm": 8.375, "learning_rate": 6.9630878096075314e-06, "loss": 1.604, "step": 31320 }, { "epoch": 0.9116560491025977, "grad_norm": 12.125, "learning_rate": 6.9611485279349165e-06, "loss": 1.5641, "step": 31340 }, { "epoch": 0.9122378334351456, "grad_norm": 14.125, "learning_rate": 6.959209246262302e-06, "loss": 1.6305, "step": 31360 }, { "epoch": 0.9128196177676935, "grad_norm": 12.375, "learning_rate": 6.957269964589687e-06, "loss": 1.6007, "step": 31380 }, { "epoch": 0.9134014021002415, "grad_norm": 14.0625, "learning_rate": 6.955330682917072e-06, "loss": 1.7345, "step": 31400 }, { "epoch": 0.9139831864327894, "grad_norm": 14.3125, "learning_rate": 6.953391401244457e-06, "loss": 1.6524, "step": 31420 }, { "epoch": 0.9145649707653373, "grad_norm": 12.875, "learning_rate": 6.951452119571842e-06, "loss": 1.5854, "step": 31440 }, { "epoch": 0.9151467550978852, "grad_norm": 10.9375, "learning_rate": 6.949512837899227e-06, "loss": 1.5793, "step": 31460 }, { "epoch": 0.9157285394304331, "grad_norm": 11.625, "learning_rate": 6.947573556226612e-06, "loss": 1.6801, "step": 31480 }, { "epoch": 0.916310323762981, "grad_norm": 9.5, "learning_rate": 6.945634274553997e-06, "loss": 1.64, "step": 31500 }, { "epoch": 0.916892108095529, "grad_norm": 11.8125, "learning_rate": 6.943694992881382e-06, "loss": 1.5614, "step": 31520 }, { "epoch": 0.9174738924280769, "grad_norm": 14.1875, "learning_rate": 6.9417557112087675e-06, "loss": 1.6406, "step": 31540 }, { "epoch": 0.9180556767606248, "grad_norm": 14.1875, "learning_rate": 6.939816429536153e-06, "loss": 1.6144, "step": 31560 }, { "epoch": 0.9186374610931728, "grad_norm": 13.875, "learning_rate": 6.937877147863538e-06, "loss": 1.6121, "step": 31580 }, { "epoch": 0.9192192454257206, "grad_norm": 11.5, "learning_rate": 6.935937866190923e-06, "loss": 1.5784, "step": 31600 }, { "epoch": 0.9198010297582686, "grad_norm": 15.5, "learning_rate": 6.933998584518308e-06, "loss": 1.506, "step": 31620 }, { "epoch": 0.9203828140908166, "grad_norm": 11.9375, "learning_rate": 6.932059302845693e-06, "loss": 1.6829, "step": 31640 }, { "epoch": 0.9209645984233644, "grad_norm": 14.625, "learning_rate": 6.930120021173078e-06, "loss": 1.7056, "step": 31660 }, { "epoch": 0.9215463827559124, "grad_norm": 11.9375, "learning_rate": 6.928180739500463e-06, "loss": 1.6337, "step": 31680 }, { "epoch": 0.9221281670884603, "grad_norm": 10.0, "learning_rate": 6.926241457827848e-06, "loss": 1.6357, "step": 31700 }, { "epoch": 0.9227099514210082, "grad_norm": 12.5, "learning_rate": 6.924302176155233e-06, "loss": 1.6074, "step": 31720 }, { "epoch": 0.9232917357535562, "grad_norm": 12.625, "learning_rate": 6.9223628944826185e-06, "loss": 1.7342, "step": 31740 }, { "epoch": 0.9238735200861041, "grad_norm": 14.0, "learning_rate": 6.920423612810004e-06, "loss": 1.712, "step": 31760 }, { "epoch": 0.924455304418652, "grad_norm": 24.375, "learning_rate": 6.918484331137389e-06, "loss": 1.6576, "step": 31780 }, { "epoch": 0.9250370887511999, "grad_norm": 10.875, "learning_rate": 6.916545049464774e-06, "loss": 1.6502, "step": 31800 }, { "epoch": 0.9256188730837479, "grad_norm": 10.625, "learning_rate": 6.914605767792159e-06, "loss": 1.5984, "step": 31820 }, { "epoch": 0.9262006574162958, "grad_norm": 11.5, "learning_rate": 6.912666486119544e-06, "loss": 1.5611, "step": 31840 }, { "epoch": 0.9267824417488437, "grad_norm": 12.5625, "learning_rate": 6.910727204446929e-06, "loss": 1.5556, "step": 31860 }, { "epoch": 0.9273642260813916, "grad_norm": 12.0, "learning_rate": 6.908787922774314e-06, "loss": 1.591, "step": 31880 }, { "epoch": 0.9279460104139395, "grad_norm": 13.4375, "learning_rate": 6.906848641101699e-06, "loss": 1.6348, "step": 31900 }, { "epoch": 0.9285277947464875, "grad_norm": 12.25, "learning_rate": 6.9049093594290835e-06, "loss": 1.6385, "step": 31920 }, { "epoch": 0.9291095790790354, "grad_norm": 12.5625, "learning_rate": 6.902970077756469e-06, "loss": 1.7238, "step": 31940 }, { "epoch": 0.9296913634115833, "grad_norm": 12.8125, "learning_rate": 6.901030796083854e-06, "loss": 1.5489, "step": 31960 }, { "epoch": 0.9302731477441313, "grad_norm": 13.75, "learning_rate": 6.899091514411239e-06, "loss": 1.647, "step": 31980 }, { "epoch": 0.9308549320766791, "grad_norm": 13.75, "learning_rate": 6.897152232738624e-06, "loss": 1.6397, "step": 32000 }, { "epoch": 0.9314367164092271, "grad_norm": 13.25, "learning_rate": 6.895212951066009e-06, "loss": 1.6137, "step": 32020 }, { "epoch": 0.9320185007417751, "grad_norm": 10.0, "learning_rate": 6.893273669393394e-06, "loss": 1.6826, "step": 32040 }, { "epoch": 0.9326002850743229, "grad_norm": 13.9375, "learning_rate": 6.891334387720779e-06, "loss": 1.6708, "step": 32060 }, { "epoch": 0.9331820694068709, "grad_norm": 10.9375, "learning_rate": 6.889395106048164e-06, "loss": 1.5909, "step": 32080 }, { "epoch": 0.9337638537394188, "grad_norm": 13.0, "learning_rate": 6.887455824375549e-06, "loss": 1.5761, "step": 32100 }, { "epoch": 0.9343456380719667, "grad_norm": 11.0625, "learning_rate": 6.8855165427029345e-06, "loss": 1.642, "step": 32120 }, { "epoch": 0.9349274224045147, "grad_norm": 11.625, "learning_rate": 6.88357726103032e-06, "loss": 1.6688, "step": 32140 }, { "epoch": 0.9355092067370626, "grad_norm": 13.125, "learning_rate": 6.881637979357705e-06, "loss": 1.6207, "step": 32160 }, { "epoch": 0.9360909910696105, "grad_norm": 11.125, "learning_rate": 6.87969869768509e-06, "loss": 1.6017, "step": 32180 }, { "epoch": 0.9366727754021584, "grad_norm": 11.625, "learning_rate": 6.877759416012475e-06, "loss": 1.6321, "step": 32200 }, { "epoch": 0.9372545597347064, "grad_norm": 12.875, "learning_rate": 6.87582013433986e-06, "loss": 1.6127, "step": 32220 }, { "epoch": 0.9378363440672542, "grad_norm": 14.875, "learning_rate": 6.873880852667245e-06, "loss": 1.7372, "step": 32240 }, { "epoch": 0.9384181283998022, "grad_norm": 15.9375, "learning_rate": 6.87194157099463e-06, "loss": 1.6319, "step": 32260 }, { "epoch": 0.9389999127323502, "grad_norm": 13.625, "learning_rate": 6.870002289322015e-06, "loss": 1.5551, "step": 32280 }, { "epoch": 0.939581697064898, "grad_norm": 10.6875, "learning_rate": 6.8680630076494e-06, "loss": 1.6711, "step": 32300 }, { "epoch": 0.940163481397446, "grad_norm": 12.1875, "learning_rate": 6.8661237259767855e-06, "loss": 1.5174, "step": 32320 }, { "epoch": 0.9407452657299938, "grad_norm": 12.3125, "learning_rate": 6.8641844443041706e-06, "loss": 1.7003, "step": 32340 }, { "epoch": 0.9413270500625418, "grad_norm": 12.0625, "learning_rate": 6.862245162631556e-06, "loss": 1.6688, "step": 32360 }, { "epoch": 0.9419088343950898, "grad_norm": 11.8125, "learning_rate": 6.860305880958941e-06, "loss": 1.5936, "step": 32380 }, { "epoch": 0.9424906187276376, "grad_norm": 10.0625, "learning_rate": 6.858366599286326e-06, "loss": 1.67, "step": 32400 }, { "epoch": 0.9430724030601856, "grad_norm": 16.75, "learning_rate": 6.856427317613711e-06, "loss": 1.6339, "step": 32420 }, { "epoch": 0.9436541873927335, "grad_norm": 13.875, "learning_rate": 6.854488035941096e-06, "loss": 1.6749, "step": 32440 }, { "epoch": 0.9442359717252814, "grad_norm": 14.25, "learning_rate": 6.852548754268481e-06, "loss": 1.6833, "step": 32460 }, { "epoch": 0.9448177560578294, "grad_norm": 11.1875, "learning_rate": 6.850609472595866e-06, "loss": 1.6442, "step": 32480 }, { "epoch": 0.9453995403903773, "grad_norm": 12.875, "learning_rate": 6.848670190923251e-06, "loss": 1.5534, "step": 32500 }, { "epoch": 0.9459813247229252, "grad_norm": 12.5, "learning_rate": 6.8467309092506364e-06, "loss": 1.5668, "step": 32520 }, { "epoch": 0.9465631090554731, "grad_norm": 13.8125, "learning_rate": 6.8447916275780215e-06, "loss": 1.6358, "step": 32540 }, { "epoch": 0.9471448933880211, "grad_norm": 13.5, "learning_rate": 6.842852345905407e-06, "loss": 1.6178, "step": 32560 }, { "epoch": 0.947726677720569, "grad_norm": 12.9375, "learning_rate": 6.840913064232792e-06, "loss": 1.5604, "step": 32580 }, { "epoch": 0.9483084620531169, "grad_norm": 12.3125, "learning_rate": 6.838973782560177e-06, "loss": 1.6273, "step": 32600 }, { "epoch": 0.9488902463856649, "grad_norm": 11.25, "learning_rate": 6.837034500887562e-06, "loss": 1.629, "step": 32620 }, { "epoch": 0.9494720307182127, "grad_norm": 11.1875, "learning_rate": 6.835095219214947e-06, "loss": 1.621, "step": 32640 }, { "epoch": 0.9500538150507607, "grad_norm": 12.8125, "learning_rate": 6.833155937542332e-06, "loss": 1.6799, "step": 32660 }, { "epoch": 0.9506355993833087, "grad_norm": 11.25, "learning_rate": 6.831216655869717e-06, "loss": 1.5411, "step": 32680 }, { "epoch": 0.9512173837158565, "grad_norm": 12.1875, "learning_rate": 6.829277374197102e-06, "loss": 1.6182, "step": 32700 }, { "epoch": 0.9517991680484045, "grad_norm": 11.1875, "learning_rate": 6.8273380925244866e-06, "loss": 1.5881, "step": 32720 }, { "epoch": 0.9523809523809523, "grad_norm": 12.625, "learning_rate": 6.825398810851872e-06, "loss": 1.6739, "step": 32740 }, { "epoch": 0.9529627367135003, "grad_norm": 12.125, "learning_rate": 6.823459529179257e-06, "loss": 1.6474, "step": 32760 }, { "epoch": 0.9535445210460483, "grad_norm": 12.125, "learning_rate": 6.821520247506642e-06, "loss": 1.652, "step": 32780 }, { "epoch": 0.9541263053785961, "grad_norm": 12.4375, "learning_rate": 6.819580965834027e-06, "loss": 1.6006, "step": 32800 }, { "epoch": 0.9547080897111441, "grad_norm": 10.8125, "learning_rate": 6.817641684161412e-06, "loss": 1.5544, "step": 32820 }, { "epoch": 0.955289874043692, "grad_norm": 11.1875, "learning_rate": 6.815702402488797e-06, "loss": 1.6969, "step": 32840 }, { "epoch": 0.9558716583762399, "grad_norm": 12.25, "learning_rate": 6.813763120816182e-06, "loss": 1.5689, "step": 32860 }, { "epoch": 0.9564534427087878, "grad_norm": 11.9375, "learning_rate": 6.811823839143567e-06, "loss": 1.6163, "step": 32880 }, { "epoch": 0.9570352270413358, "grad_norm": 11.625, "learning_rate": 6.8098845574709524e-06, "loss": 1.5934, "step": 32900 }, { "epoch": 0.9576170113738837, "grad_norm": 11.25, "learning_rate": 6.8079452757983375e-06, "loss": 1.6005, "step": 32920 }, { "epoch": 0.9581987957064316, "grad_norm": 13.3125, "learning_rate": 6.806005994125723e-06, "loss": 1.6646, "step": 32940 }, { "epoch": 0.9587805800389796, "grad_norm": 12.4375, "learning_rate": 6.804066712453108e-06, "loss": 1.5621, "step": 32960 }, { "epoch": 0.9593623643715274, "grad_norm": 12.1875, "learning_rate": 6.802127430780493e-06, "loss": 1.5968, "step": 32980 }, { "epoch": 0.9599441487040754, "grad_norm": 13.25, "learning_rate": 6.800188149107878e-06, "loss": 1.5637, "step": 33000 }, { "epoch": 0.9605259330366234, "grad_norm": 16.125, "learning_rate": 6.798248867435263e-06, "loss": 1.5478, "step": 33020 }, { "epoch": 0.9611077173691712, "grad_norm": 14.5, "learning_rate": 6.796309585762648e-06, "loss": 1.4745, "step": 33040 }, { "epoch": 0.9616895017017192, "grad_norm": 11.0625, "learning_rate": 6.794370304090033e-06, "loss": 1.6644, "step": 33060 }, { "epoch": 0.9622712860342671, "grad_norm": 12.3125, "learning_rate": 6.792431022417418e-06, "loss": 1.6051, "step": 33080 }, { "epoch": 0.962853070366815, "grad_norm": 12.6875, "learning_rate": 6.790491740744803e-06, "loss": 1.5699, "step": 33100 }, { "epoch": 0.963434854699363, "grad_norm": 12.8125, "learning_rate": 6.7885524590721885e-06, "loss": 1.6223, "step": 33120 }, { "epoch": 0.9640166390319108, "grad_norm": 12.0, "learning_rate": 6.786613177399574e-06, "loss": 1.6234, "step": 33140 }, { "epoch": 0.9645984233644588, "grad_norm": 12.625, "learning_rate": 6.784673895726959e-06, "loss": 1.6072, "step": 33160 }, { "epoch": 0.9651802076970067, "grad_norm": 12.375, "learning_rate": 6.782734614054344e-06, "loss": 1.6224, "step": 33180 }, { "epoch": 0.9657619920295546, "grad_norm": 12.0625, "learning_rate": 6.780795332381729e-06, "loss": 1.6087, "step": 33200 }, { "epoch": 0.9663437763621026, "grad_norm": 12.9375, "learning_rate": 6.778856050709114e-06, "loss": 1.6673, "step": 33220 }, { "epoch": 0.9669255606946505, "grad_norm": 14.0, "learning_rate": 6.776916769036499e-06, "loss": 1.6716, "step": 33240 }, { "epoch": 0.9675073450271984, "grad_norm": 11.4375, "learning_rate": 6.774977487363884e-06, "loss": 1.6317, "step": 33260 }, { "epoch": 0.9680891293597463, "grad_norm": 11.875, "learning_rate": 6.773038205691269e-06, "loss": 1.6376, "step": 33280 }, { "epoch": 0.9686709136922943, "grad_norm": 13.75, "learning_rate": 6.771098924018654e-06, "loss": 1.5793, "step": 33300 }, { "epoch": 0.9692526980248422, "grad_norm": 11.6875, "learning_rate": 6.769159642346038e-06, "loss": 1.5495, "step": 33320 }, { "epoch": 0.9698344823573901, "grad_norm": 14.1875, "learning_rate": 6.767220360673423e-06, "loss": 1.6489, "step": 33340 }, { "epoch": 0.9704162666899381, "grad_norm": 13.5, "learning_rate": 6.765281079000808e-06, "loss": 1.6244, "step": 33360 }, { "epoch": 0.9709980510224859, "grad_norm": 12.1875, "learning_rate": 6.763341797328193e-06, "loss": 1.6047, "step": 33380 }, { "epoch": 0.9715798353550339, "grad_norm": 14.125, "learning_rate": 6.761402515655578e-06, "loss": 1.6346, "step": 33400 }, { "epoch": 0.9721616196875819, "grad_norm": 11.75, "learning_rate": 6.759463233982963e-06, "loss": 1.5593, "step": 33420 }, { "epoch": 0.9727434040201297, "grad_norm": 13.4375, "learning_rate": 6.757523952310348e-06, "loss": 1.4955, "step": 33440 }, { "epoch": 0.9733251883526777, "grad_norm": 13.1875, "learning_rate": 6.7555846706377335e-06, "loss": 1.583, "step": 33460 }, { "epoch": 0.9739069726852256, "grad_norm": 13.625, "learning_rate": 6.7536453889651186e-06, "loss": 1.6102, "step": 33480 }, { "epoch": 0.9744887570177735, "grad_norm": 12.125, "learning_rate": 6.751706107292504e-06, "loss": 1.6518, "step": 33500 }, { "epoch": 0.9750705413503215, "grad_norm": 12.625, "learning_rate": 6.749766825619889e-06, "loss": 1.6665, "step": 33520 }, { "epoch": 0.9756523256828694, "grad_norm": 12.9375, "learning_rate": 6.747827543947274e-06, "loss": 1.7361, "step": 33540 }, { "epoch": 0.9762341100154173, "grad_norm": 10.9375, "learning_rate": 6.745888262274659e-06, "loss": 1.6369, "step": 33560 }, { "epoch": 0.9768158943479652, "grad_norm": 16.75, "learning_rate": 6.743948980602044e-06, "loss": 1.5685, "step": 33580 }, { "epoch": 0.9773976786805131, "grad_norm": 12.9375, "learning_rate": 6.742009698929429e-06, "loss": 1.6494, "step": 33600 }, { "epoch": 0.977979463013061, "grad_norm": 11.625, "learning_rate": 6.740070417256814e-06, "loss": 1.6524, "step": 33620 }, { "epoch": 0.978561247345609, "grad_norm": 13.0625, "learning_rate": 6.738131135584199e-06, "loss": 1.5863, "step": 33640 }, { "epoch": 0.9791430316781569, "grad_norm": 11.25, "learning_rate": 6.7361918539115844e-06, "loss": 1.6313, "step": 33660 }, { "epoch": 0.9797248160107048, "grad_norm": 16.125, "learning_rate": 6.7342525722389695e-06, "loss": 1.6042, "step": 33680 }, { "epoch": 0.9803066003432528, "grad_norm": 13.3125, "learning_rate": 6.732313290566355e-06, "loss": 1.5964, "step": 33700 }, { "epoch": 0.9808883846758006, "grad_norm": 12.5625, "learning_rate": 6.73037400889374e-06, "loss": 1.5968, "step": 33720 }, { "epoch": 0.9814701690083486, "grad_norm": 13.4375, "learning_rate": 6.728434727221125e-06, "loss": 1.6331, "step": 33740 }, { "epoch": 0.9820519533408966, "grad_norm": 11.6875, "learning_rate": 6.72649544554851e-06, "loss": 1.6312, "step": 33760 }, { "epoch": 0.9826337376734444, "grad_norm": 13.375, "learning_rate": 6.724556163875895e-06, "loss": 1.6851, "step": 33780 }, { "epoch": 0.9832155220059924, "grad_norm": 14.25, "learning_rate": 6.72261688220328e-06, "loss": 1.6674, "step": 33800 }, { "epoch": 0.9837973063385403, "grad_norm": 12.75, "learning_rate": 6.720677600530665e-06, "loss": 1.5453, "step": 33820 }, { "epoch": 0.9843790906710882, "grad_norm": 12.25, "learning_rate": 6.71873831885805e-06, "loss": 1.7205, "step": 33840 }, { "epoch": 0.9849608750036362, "grad_norm": 11.4375, "learning_rate": 6.7167990371854354e-06, "loss": 1.672, "step": 33860 }, { "epoch": 0.9855426593361841, "grad_norm": 12.125, "learning_rate": 6.7148597555128205e-06, "loss": 1.5611, "step": 33880 }, { "epoch": 0.986124443668732, "grad_norm": 11.75, "learning_rate": 6.712920473840206e-06, "loss": 1.6928, "step": 33900 }, { "epoch": 0.9867062280012799, "grad_norm": 11.25, "learning_rate": 6.710981192167591e-06, "loss": 1.6471, "step": 33920 }, { "epoch": 0.9872880123338279, "grad_norm": 13.5, "learning_rate": 6.709041910494976e-06, "loss": 1.6177, "step": 33940 }, { "epoch": 0.9878697966663758, "grad_norm": 15.125, "learning_rate": 6.707102628822361e-06, "loss": 1.6374, "step": 33960 }, { "epoch": 0.9884515809989237, "grad_norm": 13.6875, "learning_rate": 6.705163347149746e-06, "loss": 1.5153, "step": 33980 }, { "epoch": 0.9890333653314716, "grad_norm": 13.0, "learning_rate": 6.703224065477131e-06, "loss": 1.6144, "step": 34000 }, { "epoch": 0.9896151496640195, "grad_norm": 11.875, "learning_rate": 6.701284783804516e-06, "loss": 1.5558, "step": 34020 }, { "epoch": 0.9901969339965675, "grad_norm": 10.6875, "learning_rate": 6.699345502131901e-06, "loss": 1.6159, "step": 34040 }, { "epoch": 0.9907787183291153, "grad_norm": 14.4375, "learning_rate": 6.697406220459286e-06, "loss": 1.6065, "step": 34060 }, { "epoch": 0.9913605026616633, "grad_norm": 13.5, "learning_rate": 6.6954669387866715e-06, "loss": 1.5929, "step": 34080 }, { "epoch": 0.9919422869942113, "grad_norm": 11.8125, "learning_rate": 6.693527657114056e-06, "loss": 1.6878, "step": 34100 }, { "epoch": 0.9925240713267591, "grad_norm": 12.3125, "learning_rate": 6.691588375441441e-06, "loss": 1.5968, "step": 34120 }, { "epoch": 0.9931058556593071, "grad_norm": 13.8125, "learning_rate": 6.689649093768826e-06, "loss": 1.6993, "step": 34140 }, { "epoch": 0.993687639991855, "grad_norm": 14.0, "learning_rate": 6.687709812096211e-06, "loss": 1.6291, "step": 34160 }, { "epoch": 0.9942694243244029, "grad_norm": 14.5, "learning_rate": 6.685770530423596e-06, "loss": 1.5992, "step": 34180 }, { "epoch": 0.9948512086569509, "grad_norm": 13.4375, "learning_rate": 6.683831248750981e-06, "loss": 1.6653, "step": 34200 }, { "epoch": 0.9954329929894988, "grad_norm": 12.75, "learning_rate": 6.681891967078366e-06, "loss": 1.6667, "step": 34220 }, { "epoch": 0.9960147773220467, "grad_norm": 13.1875, "learning_rate": 6.6799526854057514e-06, "loss": 1.6231, "step": 34240 }, { "epoch": 0.9965965616545946, "grad_norm": 11.1875, "learning_rate": 6.6780134037331365e-06, "loss": 1.7432, "step": 34260 }, { "epoch": 0.9971783459871426, "grad_norm": 13.875, "learning_rate": 6.676074122060522e-06, "loss": 1.6409, "step": 34280 }, { "epoch": 0.9977601303196905, "grad_norm": 12.8125, "learning_rate": 6.674134840387907e-06, "loss": 1.6375, "step": 34300 }, { "epoch": 0.9983419146522384, "grad_norm": 12.0, "learning_rate": 6.672195558715292e-06, "loss": 1.6822, "step": 34320 }, { "epoch": 0.9989236989847864, "grad_norm": 15.5625, "learning_rate": 6.670256277042677e-06, "loss": 1.5642, "step": 34340 }, { "epoch": 0.9995054833173342, "grad_norm": 9.75, "learning_rate": 6.668316995370062e-06, "loss": 1.7238, "step": 34360 }, { "epoch": 1.0000872676498822, "grad_norm": 10.125, "learning_rate": 6.666377713697447e-06, "loss": 1.6699, "step": 34380 }, { "epoch": 1.0006690519824302, "grad_norm": 12.5, "learning_rate": 6.664438432024832e-06, "loss": 1.5624, "step": 34400 }, { "epoch": 1.0012508363149781, "grad_norm": 10.5, "learning_rate": 6.662499150352217e-06, "loss": 1.5326, "step": 34420 }, { "epoch": 1.0018326206475259, "grad_norm": 14.5, "learning_rate": 6.660559868679602e-06, "loss": 1.5176, "step": 34440 }, { "epoch": 1.0024144049800738, "grad_norm": 10.6875, "learning_rate": 6.6586205870069875e-06, "loss": 1.518, "step": 34460 }, { "epoch": 1.0029961893126218, "grad_norm": 13.5625, "learning_rate": 6.656681305334373e-06, "loss": 1.519, "step": 34480 }, { "epoch": 1.0035779736451698, "grad_norm": 11.625, "learning_rate": 6.654742023661758e-06, "loss": 1.5261, "step": 34500 }, { "epoch": 1.0041597579777177, "grad_norm": 18.625, "learning_rate": 6.652802741989143e-06, "loss": 1.5199, "step": 34520 }, { "epoch": 1.0047415423102657, "grad_norm": 14.0625, "learning_rate": 6.650863460316528e-06, "loss": 1.4781, "step": 34540 }, { "epoch": 1.0053233266428134, "grad_norm": 12.5625, "learning_rate": 6.648924178643913e-06, "loss": 1.5739, "step": 34560 }, { "epoch": 1.0059051109753614, "grad_norm": 13.9375, "learning_rate": 6.646984896971298e-06, "loss": 1.4837, "step": 34580 }, { "epoch": 1.0064868953079094, "grad_norm": 12.5, "learning_rate": 6.645045615298683e-06, "loss": 1.546, "step": 34600 }, { "epoch": 1.0070686796404573, "grad_norm": 14.9375, "learning_rate": 6.643106333626068e-06, "loss": 1.4559, "step": 34620 }, { "epoch": 1.0076504639730053, "grad_norm": 10.75, "learning_rate": 6.641167051953453e-06, "loss": 1.5901, "step": 34640 }, { "epoch": 1.008232248305553, "grad_norm": 13.375, "learning_rate": 6.6392277702808385e-06, "loss": 1.5853, "step": 34660 }, { "epoch": 1.008814032638101, "grad_norm": 10.75, "learning_rate": 6.6372884886082236e-06, "loss": 1.5529, "step": 34680 }, { "epoch": 1.009395816970649, "grad_norm": 13.4375, "learning_rate": 6.635349206935609e-06, "loss": 1.5142, "step": 34700 }, { "epoch": 1.009977601303197, "grad_norm": 16.375, "learning_rate": 6.633409925262994e-06, "loss": 1.5687, "step": 34720 }, { "epoch": 1.0105593856357449, "grad_norm": 15.0, "learning_rate": 6.631470643590379e-06, "loss": 1.5669, "step": 34740 }, { "epoch": 1.0111411699682928, "grad_norm": 17.625, "learning_rate": 6.629531361917764e-06, "loss": 1.4651, "step": 34760 }, { "epoch": 1.0117229543008406, "grad_norm": 11.125, "learning_rate": 6.627592080245149e-06, "loss": 1.4856, "step": 34780 }, { "epoch": 1.0123047386333885, "grad_norm": 11.8125, "learning_rate": 6.625652798572534e-06, "loss": 1.6137, "step": 34800 }, { "epoch": 1.0128865229659365, "grad_norm": 12.8125, "learning_rate": 6.623713516899919e-06, "loss": 1.5333, "step": 34820 }, { "epoch": 1.0134683072984845, "grad_norm": 13.1875, "learning_rate": 6.621774235227304e-06, "loss": 1.4606, "step": 34840 }, { "epoch": 1.0140500916310324, "grad_norm": 13.9375, "learning_rate": 6.6198349535546895e-06, "loss": 1.4368, "step": 34860 }, { "epoch": 1.0146318759635804, "grad_norm": 12.5625, "learning_rate": 6.6178956718820745e-06, "loss": 1.4579, "step": 34880 }, { "epoch": 1.0152136602961281, "grad_norm": 11.625, "learning_rate": 6.61595639020946e-06, "loss": 1.4373, "step": 34900 }, { "epoch": 1.015795444628676, "grad_norm": 14.9375, "learning_rate": 6.614017108536844e-06, "loss": 1.5626, "step": 34920 }, { "epoch": 1.016377228961224, "grad_norm": 12.25, "learning_rate": 6.612077826864229e-06, "loss": 1.5653, "step": 34940 }, { "epoch": 1.016959013293772, "grad_norm": 13.0, "learning_rate": 6.610138545191614e-06, "loss": 1.54, "step": 34960 }, { "epoch": 1.01754079762632, "grad_norm": 11.375, "learning_rate": 6.608199263518999e-06, "loss": 1.4087, "step": 34980 }, { "epoch": 1.018122581958868, "grad_norm": 12.5, "learning_rate": 6.606259981846384e-06, "loss": 1.5305, "step": 35000 }, { "epoch": 1.0187043662914157, "grad_norm": 12.75, "learning_rate": 6.604320700173769e-06, "loss": 1.5814, "step": 35020 }, { "epoch": 1.0192861506239637, "grad_norm": 11.3125, "learning_rate": 6.6023814185011545e-06, "loss": 1.4229, "step": 35040 }, { "epoch": 1.0198679349565116, "grad_norm": 11.625, "learning_rate": 6.60044213682854e-06, "loss": 1.5016, "step": 35060 }, { "epoch": 1.0204497192890596, "grad_norm": 13.125, "learning_rate": 6.598502855155925e-06, "loss": 1.4902, "step": 35080 }, { "epoch": 1.0210315036216076, "grad_norm": 11.4375, "learning_rate": 6.59656357348331e-06, "loss": 1.5648, "step": 35100 }, { "epoch": 1.0216132879541553, "grad_norm": 13.0, "learning_rate": 6.594624291810695e-06, "loss": 1.5088, "step": 35120 }, { "epoch": 1.0221950722867033, "grad_norm": 13.125, "learning_rate": 6.59268501013808e-06, "loss": 1.5048, "step": 35140 }, { "epoch": 1.0227768566192512, "grad_norm": 12.8125, "learning_rate": 6.590745728465465e-06, "loss": 1.5046, "step": 35160 }, { "epoch": 1.0233586409517992, "grad_norm": 12.375, "learning_rate": 6.58880644679285e-06, "loss": 1.5097, "step": 35180 }, { "epoch": 1.0239404252843471, "grad_norm": 12.625, "learning_rate": 6.586867165120235e-06, "loss": 1.5104, "step": 35200 }, { "epoch": 1.024522209616895, "grad_norm": 14.25, "learning_rate": 6.58492788344762e-06, "loss": 1.5456, "step": 35220 }, { "epoch": 1.0251039939494428, "grad_norm": 12.625, "learning_rate": 6.5829886017750055e-06, "loss": 1.5434, "step": 35240 }, { "epoch": 1.0256857782819908, "grad_norm": 12.625, "learning_rate": 6.5810493201023906e-06, "loss": 1.6952, "step": 35260 }, { "epoch": 1.0262675626145388, "grad_norm": 12.5, "learning_rate": 6.579110038429776e-06, "loss": 1.5075, "step": 35280 }, { "epoch": 1.0268493469470867, "grad_norm": 14.0, "learning_rate": 6.577170756757161e-06, "loss": 1.5463, "step": 35300 }, { "epoch": 1.0274311312796347, "grad_norm": 14.1875, "learning_rate": 6.575231475084546e-06, "loss": 1.5216, "step": 35320 }, { "epoch": 1.0280129156121827, "grad_norm": 11.8125, "learning_rate": 6.573292193411931e-06, "loss": 1.4992, "step": 35340 }, { "epoch": 1.0285946999447304, "grad_norm": 14.125, "learning_rate": 6.571352911739316e-06, "loss": 1.5322, "step": 35360 }, { "epoch": 1.0291764842772784, "grad_norm": 12.75, "learning_rate": 6.569413630066701e-06, "loss": 1.5707, "step": 35380 }, { "epoch": 1.0297582686098263, "grad_norm": 12.875, "learning_rate": 6.567474348394086e-06, "loss": 1.428, "step": 35400 }, { "epoch": 1.0303400529423743, "grad_norm": 13.4375, "learning_rate": 6.565535066721471e-06, "loss": 1.5032, "step": 35420 }, { "epoch": 1.0309218372749223, "grad_norm": 14.8125, "learning_rate": 6.5635957850488564e-06, "loss": 1.5676, "step": 35440 }, { "epoch": 1.0315036216074702, "grad_norm": 13.375, "learning_rate": 6.5616565033762415e-06, "loss": 1.5432, "step": 35460 }, { "epoch": 1.032085405940018, "grad_norm": 11.6875, "learning_rate": 6.559717221703627e-06, "loss": 1.5117, "step": 35480 }, { "epoch": 1.032667190272566, "grad_norm": 12.625, "learning_rate": 6.557777940031012e-06, "loss": 1.4937, "step": 35500 }, { "epoch": 1.033248974605114, "grad_norm": 11.75, "learning_rate": 6.555838658358397e-06, "loss": 1.5287, "step": 35520 }, { "epoch": 1.0338307589376619, "grad_norm": 12.125, "learning_rate": 6.553899376685782e-06, "loss": 1.4799, "step": 35540 }, { "epoch": 1.0344125432702098, "grad_norm": 13.0, "learning_rate": 6.551960095013167e-06, "loss": 1.5311, "step": 35560 }, { "epoch": 1.0349943276027576, "grad_norm": 13.625, "learning_rate": 6.550020813340552e-06, "loss": 1.4129, "step": 35580 }, { "epoch": 1.0355761119353055, "grad_norm": 10.625, "learning_rate": 6.548081531667937e-06, "loss": 1.4979, "step": 35600 }, { "epoch": 1.0361578962678535, "grad_norm": 13.25, "learning_rate": 6.546142249995322e-06, "loss": 1.5552, "step": 35620 }, { "epoch": 1.0367396806004014, "grad_norm": 12.375, "learning_rate": 6.544202968322707e-06, "loss": 1.5192, "step": 35640 }, { "epoch": 1.0373214649329494, "grad_norm": 13.125, "learning_rate": 6.5422636866500925e-06, "loss": 1.5752, "step": 35660 }, { "epoch": 1.0379032492654974, "grad_norm": 14.125, "learning_rate": 6.540324404977478e-06, "loss": 1.4708, "step": 35680 }, { "epoch": 1.0384850335980451, "grad_norm": 12.625, "learning_rate": 6.538385123304863e-06, "loss": 1.5739, "step": 35700 }, { "epoch": 1.039066817930593, "grad_norm": 12.1875, "learning_rate": 6.536445841632248e-06, "loss": 1.5387, "step": 35720 }, { "epoch": 1.039648602263141, "grad_norm": 15.1875, "learning_rate": 6.534506559959632e-06, "loss": 1.4135, "step": 35740 }, { "epoch": 1.040230386595689, "grad_norm": 12.5625, "learning_rate": 6.532567278287017e-06, "loss": 1.5351, "step": 35760 }, { "epoch": 1.040812170928237, "grad_norm": 13.9375, "learning_rate": 6.530627996614402e-06, "loss": 1.5075, "step": 35780 }, { "epoch": 1.041393955260785, "grad_norm": 11.8125, "learning_rate": 6.528688714941787e-06, "loss": 1.5084, "step": 35800 }, { "epoch": 1.0419757395933327, "grad_norm": 11.25, "learning_rate": 6.5267494332691724e-06, "loss": 1.5374, "step": 35820 }, { "epoch": 1.0425575239258806, "grad_norm": 13.4375, "learning_rate": 6.5248101515965575e-06, "loss": 1.5538, "step": 35840 }, { "epoch": 1.0431393082584286, "grad_norm": 13.3125, "learning_rate": 6.522870869923943e-06, "loss": 1.5906, "step": 35860 }, { "epoch": 1.0437210925909766, "grad_norm": 9.5625, "learning_rate": 6.520931588251328e-06, "loss": 1.4507, "step": 35880 }, { "epoch": 1.0443028769235245, "grad_norm": 12.75, "learning_rate": 6.518992306578713e-06, "loss": 1.4307, "step": 35900 }, { "epoch": 1.0448846612560723, "grad_norm": 11.375, "learning_rate": 6.517053024906098e-06, "loss": 1.6122, "step": 35920 }, { "epoch": 1.0454664455886202, "grad_norm": 13.25, "learning_rate": 6.515113743233483e-06, "loss": 1.5159, "step": 35940 }, { "epoch": 1.0460482299211682, "grad_norm": 13.4375, "learning_rate": 6.513174461560868e-06, "loss": 1.4978, "step": 35960 }, { "epoch": 1.0466300142537162, "grad_norm": 10.9375, "learning_rate": 6.511235179888253e-06, "loss": 1.547, "step": 35980 }, { "epoch": 1.0472117985862641, "grad_norm": 14.0, "learning_rate": 6.509295898215638e-06, "loss": 1.5661, "step": 36000 }, { "epoch": 1.047793582918812, "grad_norm": 13.3125, "learning_rate": 6.507356616543023e-06, "loss": 1.5157, "step": 36020 }, { "epoch": 1.0483753672513598, "grad_norm": 11.5625, "learning_rate": 6.5054173348704085e-06, "loss": 1.5278, "step": 36040 }, { "epoch": 1.0489571515839078, "grad_norm": 11.125, "learning_rate": 6.503478053197794e-06, "loss": 1.4772, "step": 36060 }, { "epoch": 1.0495389359164558, "grad_norm": 14.75, "learning_rate": 6.501538771525179e-06, "loss": 1.5065, "step": 36080 }, { "epoch": 1.0501207202490037, "grad_norm": 11.625, "learning_rate": 6.499599489852564e-06, "loss": 1.49, "step": 36100 }, { "epoch": 1.0507025045815517, "grad_norm": 12.8125, "learning_rate": 6.497660208179949e-06, "loss": 1.5775, "step": 36120 }, { "epoch": 1.0512842889140996, "grad_norm": 11.8125, "learning_rate": 6.495720926507334e-06, "loss": 1.5412, "step": 36140 }, { "epoch": 1.0518660732466474, "grad_norm": 15.125, "learning_rate": 6.493781644834719e-06, "loss": 1.5165, "step": 36160 }, { "epoch": 1.0524478575791953, "grad_norm": 13.875, "learning_rate": 6.491842363162104e-06, "loss": 1.5471, "step": 36180 }, { "epoch": 1.0530296419117433, "grad_norm": 15.5, "learning_rate": 6.489903081489489e-06, "loss": 1.4839, "step": 36200 }, { "epoch": 1.0536114262442913, "grad_norm": 13.875, "learning_rate": 6.487963799816874e-06, "loss": 1.5373, "step": 36220 }, { "epoch": 1.0541932105768392, "grad_norm": 12.8125, "learning_rate": 6.4860245181442595e-06, "loss": 1.5859, "step": 36240 }, { "epoch": 1.054774994909387, "grad_norm": 14.5625, "learning_rate": 6.484085236471645e-06, "loss": 1.5399, "step": 36260 }, { "epoch": 1.055356779241935, "grad_norm": 12.3125, "learning_rate": 6.48214595479903e-06, "loss": 1.5187, "step": 36280 }, { "epoch": 1.055938563574483, "grad_norm": 11.1875, "learning_rate": 6.480206673126415e-06, "loss": 1.503, "step": 36300 }, { "epoch": 1.0565203479070309, "grad_norm": 12.875, "learning_rate": 6.4782673914538e-06, "loss": 1.5074, "step": 36320 }, { "epoch": 1.0571021322395788, "grad_norm": 13.875, "learning_rate": 6.476328109781185e-06, "loss": 1.5128, "step": 36340 }, { "epoch": 1.0576839165721268, "grad_norm": 12.0, "learning_rate": 6.47438882810857e-06, "loss": 1.5213, "step": 36360 }, { "epoch": 1.0582657009046745, "grad_norm": 11.6875, "learning_rate": 6.472449546435955e-06, "loss": 1.5648, "step": 36380 }, { "epoch": 1.0588474852372225, "grad_norm": 13.5625, "learning_rate": 6.47051026476334e-06, "loss": 1.5782, "step": 36400 }, { "epoch": 1.0594292695697705, "grad_norm": 14.625, "learning_rate": 6.468570983090725e-06, "loss": 1.6054, "step": 36420 }, { "epoch": 1.0600110539023184, "grad_norm": 14.8125, "learning_rate": 6.4666317014181105e-06, "loss": 1.5262, "step": 36440 }, { "epoch": 1.0605928382348664, "grad_norm": 15.6875, "learning_rate": 6.4646924197454956e-06, "loss": 1.5041, "step": 36460 }, { "epoch": 1.0611746225674143, "grad_norm": 12.0, "learning_rate": 6.462753138072881e-06, "loss": 1.4741, "step": 36480 }, { "epoch": 1.061756406899962, "grad_norm": 13.6875, "learning_rate": 6.460813856400266e-06, "loss": 1.4637, "step": 36500 }, { "epoch": 1.06233819123251, "grad_norm": 12.25, "learning_rate": 6.458874574727651e-06, "loss": 1.5187, "step": 36520 }, { "epoch": 1.062919975565058, "grad_norm": 12.9375, "learning_rate": 6.456935293055036e-06, "loss": 1.4297, "step": 36540 }, { "epoch": 1.063501759897606, "grad_norm": 10.25, "learning_rate": 6.45499601138242e-06, "loss": 1.499, "step": 36560 }, { "epoch": 1.064083544230154, "grad_norm": 11.125, "learning_rate": 6.453056729709805e-06, "loss": 1.5329, "step": 36580 }, { "epoch": 1.064665328562702, "grad_norm": 12.0625, "learning_rate": 6.45111744803719e-06, "loss": 1.589, "step": 36600 }, { "epoch": 1.0652471128952496, "grad_norm": 12.875, "learning_rate": 6.4491781663645755e-06, "loss": 1.4859, "step": 36620 }, { "epoch": 1.0658288972277976, "grad_norm": 12.5625, "learning_rate": 6.447238884691961e-06, "loss": 1.4984, "step": 36640 }, { "epoch": 1.0664106815603456, "grad_norm": 12.375, "learning_rate": 6.445299603019346e-06, "loss": 1.5674, "step": 36660 }, { "epoch": 1.0669924658928935, "grad_norm": 12.3125, "learning_rate": 6.443360321346731e-06, "loss": 1.5434, "step": 36680 }, { "epoch": 1.0675742502254415, "grad_norm": 10.4375, "learning_rate": 6.441421039674116e-06, "loss": 1.4657, "step": 36700 }, { "epoch": 1.0681560345579895, "grad_norm": 11.9375, "learning_rate": 6.439481758001501e-06, "loss": 1.4707, "step": 36720 }, { "epoch": 1.0687378188905372, "grad_norm": 14.6875, "learning_rate": 6.437542476328886e-06, "loss": 1.5764, "step": 36740 }, { "epoch": 1.0693196032230852, "grad_norm": 12.6875, "learning_rate": 6.435603194656271e-06, "loss": 1.525, "step": 36760 }, { "epoch": 1.0699013875556331, "grad_norm": 16.125, "learning_rate": 6.433663912983656e-06, "loss": 1.5138, "step": 36780 }, { "epoch": 1.070483171888181, "grad_norm": 12.0625, "learning_rate": 6.431724631311041e-06, "loss": 1.506, "step": 36800 }, { "epoch": 1.071064956220729, "grad_norm": 15.5625, "learning_rate": 6.4297853496384265e-06, "loss": 1.5367, "step": 36820 }, { "epoch": 1.0716467405532768, "grad_norm": 15.4375, "learning_rate": 6.4278460679658116e-06, "loss": 1.4997, "step": 36840 }, { "epoch": 1.0722285248858248, "grad_norm": 12.5, "learning_rate": 6.425906786293197e-06, "loss": 1.5785, "step": 36860 }, { "epoch": 1.0728103092183727, "grad_norm": 14.0625, "learning_rate": 6.423967504620582e-06, "loss": 1.4386, "step": 36880 }, { "epoch": 1.0733920935509207, "grad_norm": 14.75, "learning_rate": 6.422028222947967e-06, "loss": 1.4992, "step": 36900 }, { "epoch": 1.0739738778834687, "grad_norm": 11.8125, "learning_rate": 6.420088941275352e-06, "loss": 1.5329, "step": 36920 }, { "epoch": 1.0745556622160166, "grad_norm": 11.125, "learning_rate": 6.418149659602737e-06, "loss": 1.5295, "step": 36940 }, { "epoch": 1.0751374465485644, "grad_norm": 11.625, "learning_rate": 6.416210377930122e-06, "loss": 1.6297, "step": 36960 }, { "epoch": 1.0757192308811123, "grad_norm": 14.1875, "learning_rate": 6.414271096257507e-06, "loss": 1.5128, "step": 36980 }, { "epoch": 1.0763010152136603, "grad_norm": 13.625, "learning_rate": 6.412331814584892e-06, "loss": 1.4886, "step": 37000 }, { "epoch": 1.0768827995462082, "grad_norm": 14.125, "learning_rate": 6.4103925329122774e-06, "loss": 1.5572, "step": 37020 }, { "epoch": 1.0774645838787562, "grad_norm": 11.875, "learning_rate": 6.4084532512396625e-06, "loss": 1.4906, "step": 37040 }, { "epoch": 1.0780463682113042, "grad_norm": 12.8125, "learning_rate": 6.406513969567048e-06, "loss": 1.5079, "step": 37060 }, { "epoch": 1.078628152543852, "grad_norm": 11.5, "learning_rate": 6.404574687894433e-06, "loss": 1.5033, "step": 37080 }, { "epoch": 1.0792099368763999, "grad_norm": 15.875, "learning_rate": 6.402635406221818e-06, "loss": 1.5184, "step": 37100 }, { "epoch": 1.0797917212089478, "grad_norm": 13.75, "learning_rate": 6.400696124549203e-06, "loss": 1.5089, "step": 37120 }, { "epoch": 1.0803735055414958, "grad_norm": 13.3125, "learning_rate": 6.398756842876588e-06, "loss": 1.5207, "step": 37140 }, { "epoch": 1.0809552898740438, "grad_norm": 13.25, "learning_rate": 6.396817561203973e-06, "loss": 1.5109, "step": 37160 }, { "epoch": 1.0815370742065915, "grad_norm": 12.5, "learning_rate": 6.394878279531358e-06, "loss": 1.5187, "step": 37180 }, { "epoch": 1.0821188585391395, "grad_norm": 13.9375, "learning_rate": 6.392938997858743e-06, "loss": 1.494, "step": 37200 }, { "epoch": 1.0827006428716874, "grad_norm": 11.875, "learning_rate": 6.390999716186128e-06, "loss": 1.4747, "step": 37220 }, { "epoch": 1.0832824272042354, "grad_norm": 10.8125, "learning_rate": 6.3890604345135135e-06, "loss": 1.4979, "step": 37240 }, { "epoch": 1.0838642115367834, "grad_norm": 14.5, "learning_rate": 6.387121152840899e-06, "loss": 1.5181, "step": 37260 }, { "epoch": 1.0844459958693313, "grad_norm": 19.75, "learning_rate": 6.385181871168284e-06, "loss": 1.5811, "step": 37280 }, { "epoch": 1.085027780201879, "grad_norm": 10.9375, "learning_rate": 6.383242589495669e-06, "loss": 1.5239, "step": 37300 }, { "epoch": 1.085609564534427, "grad_norm": 14.1875, "learning_rate": 6.381303307823054e-06, "loss": 1.4841, "step": 37320 }, { "epoch": 1.086191348866975, "grad_norm": 12.5, "learning_rate": 6.379364026150439e-06, "loss": 1.539, "step": 37340 }, { "epoch": 1.086773133199523, "grad_norm": 16.0, "learning_rate": 6.377424744477823e-06, "loss": 1.495, "step": 37360 }, { "epoch": 1.087354917532071, "grad_norm": 13.8125, "learning_rate": 6.375485462805208e-06, "loss": 1.5655, "step": 37380 }, { "epoch": 1.0879367018646189, "grad_norm": 13.5, "learning_rate": 6.3735461811325934e-06, "loss": 1.5101, "step": 37400 }, { "epoch": 1.0885184861971666, "grad_norm": 12.5, "learning_rate": 6.3716068994599785e-06, "loss": 1.446, "step": 37420 }, { "epoch": 1.0891002705297146, "grad_norm": 9.9375, "learning_rate": 6.369667617787364e-06, "loss": 1.4292, "step": 37440 }, { "epoch": 1.0896820548622626, "grad_norm": 12.125, "learning_rate": 6.367728336114749e-06, "loss": 1.4819, "step": 37460 }, { "epoch": 1.0902638391948105, "grad_norm": 13.75, "learning_rate": 6.365789054442134e-06, "loss": 1.4845, "step": 37480 }, { "epoch": 1.0908456235273585, "grad_norm": 12.8125, "learning_rate": 6.363849772769519e-06, "loss": 1.5017, "step": 37500 }, { "epoch": 1.0914274078599062, "grad_norm": 13.0, "learning_rate": 6.361910491096904e-06, "loss": 1.4634, "step": 37520 }, { "epoch": 1.0920091921924542, "grad_norm": 12.0, "learning_rate": 6.359971209424289e-06, "loss": 1.4176, "step": 37540 }, { "epoch": 1.0925909765250021, "grad_norm": 10.5, "learning_rate": 6.358031927751673e-06, "loss": 1.5057, "step": 37560 }, { "epoch": 1.09317276085755, "grad_norm": 14.25, "learning_rate": 6.3560926460790585e-06, "loss": 1.4842, "step": 37580 }, { "epoch": 1.093754545190098, "grad_norm": 12.1875, "learning_rate": 6.3541533644064436e-06, "loss": 1.5763, "step": 37600 }, { "epoch": 1.094336329522646, "grad_norm": 14.6875, "learning_rate": 6.352214082733829e-06, "loss": 1.6831, "step": 37620 }, { "epoch": 1.094918113855194, "grad_norm": 11.375, "learning_rate": 6.350274801061214e-06, "loss": 1.5598, "step": 37640 }, { "epoch": 1.0954998981877417, "grad_norm": 13.375, "learning_rate": 6.348335519388599e-06, "loss": 1.4496, "step": 37660 }, { "epoch": 1.0960816825202897, "grad_norm": 13.1875, "learning_rate": 6.346396237715984e-06, "loss": 1.5174, "step": 37680 }, { "epoch": 1.0966634668528377, "grad_norm": 14.375, "learning_rate": 6.344456956043369e-06, "loss": 1.5247, "step": 37700 }, { "epoch": 1.0972452511853856, "grad_norm": 13.5, "learning_rate": 6.342517674370754e-06, "loss": 1.5636, "step": 37720 }, { "epoch": 1.0978270355179336, "grad_norm": 14.8125, "learning_rate": 6.340578392698139e-06, "loss": 1.5537, "step": 37740 }, { "epoch": 1.0984088198504813, "grad_norm": 13.5, "learning_rate": 6.338639111025524e-06, "loss": 1.5261, "step": 37760 }, { "epoch": 1.0989906041830293, "grad_norm": 12.0625, "learning_rate": 6.3366998293529094e-06, "loss": 1.5312, "step": 37780 }, { "epoch": 1.0995723885155773, "grad_norm": 11.9375, "learning_rate": 6.3347605476802945e-06, "loss": 1.5345, "step": 37800 }, { "epoch": 1.1001541728481252, "grad_norm": 12.6875, "learning_rate": 6.33282126600768e-06, "loss": 1.6277, "step": 37820 }, { "epoch": 1.1007359571806732, "grad_norm": 13.625, "learning_rate": 6.330881984335065e-06, "loss": 1.5504, "step": 37840 }, { "epoch": 1.1013177415132211, "grad_norm": 11.5625, "learning_rate": 6.32894270266245e-06, "loss": 1.4967, "step": 37860 }, { "epoch": 1.101899525845769, "grad_norm": 12.125, "learning_rate": 6.327003420989835e-06, "loss": 1.5639, "step": 37880 }, { "epoch": 1.1024813101783169, "grad_norm": 14.375, "learning_rate": 6.32506413931722e-06, "loss": 1.5901, "step": 37900 }, { "epoch": 1.1030630945108648, "grad_norm": 13.375, "learning_rate": 6.323124857644605e-06, "loss": 1.5309, "step": 37920 }, { "epoch": 1.1036448788434128, "grad_norm": 13.5, "learning_rate": 6.321185575971989e-06, "loss": 1.5142, "step": 37940 }, { "epoch": 1.1042266631759607, "grad_norm": 12.875, "learning_rate": 6.3192462942993745e-06, "loss": 1.5386, "step": 37960 }, { "epoch": 1.1048084475085087, "grad_norm": 12.5, "learning_rate": 6.3173070126267596e-06, "loss": 1.5329, "step": 37980 }, { "epoch": 1.1053902318410564, "grad_norm": 12.125, "learning_rate": 6.315367730954145e-06, "loss": 1.5415, "step": 38000 }, { "epoch": 1.1059720161736044, "grad_norm": 13.9375, "learning_rate": 6.31342844928153e-06, "loss": 1.5508, "step": 38020 }, { "epoch": 1.1065538005061524, "grad_norm": 16.0, "learning_rate": 6.311489167608915e-06, "loss": 1.5365, "step": 38040 }, { "epoch": 1.1071355848387003, "grad_norm": 12.0, "learning_rate": 6.3095498859363e-06, "loss": 1.5612, "step": 38060 }, { "epoch": 1.1077173691712483, "grad_norm": 12.0, "learning_rate": 6.307610604263685e-06, "loss": 1.46, "step": 38080 }, { "epoch": 1.108299153503796, "grad_norm": 12.4375, "learning_rate": 6.30567132259107e-06, "loss": 1.5787, "step": 38100 }, { "epoch": 1.108880937836344, "grad_norm": 13.25, "learning_rate": 6.303732040918455e-06, "loss": 1.543, "step": 38120 }, { "epoch": 1.109462722168892, "grad_norm": 12.375, "learning_rate": 6.30179275924584e-06, "loss": 1.4542, "step": 38140 }, { "epoch": 1.11004450650144, "grad_norm": 12.625, "learning_rate": 6.2998534775732255e-06, "loss": 1.4819, "step": 38160 }, { "epoch": 1.110626290833988, "grad_norm": 13.0, "learning_rate": 6.2979141959006105e-06, "loss": 1.5394, "step": 38180 }, { "epoch": 1.1112080751665359, "grad_norm": 12.9375, "learning_rate": 6.295974914227996e-06, "loss": 1.6128, "step": 38200 }, { "epoch": 1.1117898594990836, "grad_norm": 12.0, "learning_rate": 6.294035632555381e-06, "loss": 1.4606, "step": 38220 }, { "epoch": 1.1123716438316316, "grad_norm": 14.75, "learning_rate": 6.292096350882766e-06, "loss": 1.4817, "step": 38240 }, { "epoch": 1.1129534281641795, "grad_norm": 13.75, "learning_rate": 6.290157069210151e-06, "loss": 1.5162, "step": 38260 }, { "epoch": 1.1135352124967275, "grad_norm": 12.4375, "learning_rate": 6.288217787537536e-06, "loss": 1.4722, "step": 38280 }, { "epoch": 1.1141169968292755, "grad_norm": 11.75, "learning_rate": 6.286278505864921e-06, "loss": 1.4585, "step": 38300 }, { "epoch": 1.1146987811618234, "grad_norm": 11.9375, "learning_rate": 6.284339224192306e-06, "loss": 1.5375, "step": 38320 }, { "epoch": 1.1152805654943712, "grad_norm": 13.1875, "learning_rate": 6.282399942519691e-06, "loss": 1.5263, "step": 38340 }, { "epoch": 1.1158623498269191, "grad_norm": 11.5625, "learning_rate": 6.2804606608470764e-06, "loss": 1.5259, "step": 38360 }, { "epoch": 1.116444134159467, "grad_norm": 12.5, "learning_rate": 6.2785213791744615e-06, "loss": 1.4978, "step": 38380 }, { "epoch": 1.117025918492015, "grad_norm": 13.4375, "learning_rate": 6.276582097501847e-06, "loss": 1.4903, "step": 38400 }, { "epoch": 1.117607702824563, "grad_norm": 12.0, "learning_rate": 6.274642815829232e-06, "loss": 1.5991, "step": 38420 }, { "epoch": 1.1181894871571108, "grad_norm": 16.625, "learning_rate": 6.272703534156617e-06, "loss": 1.5308, "step": 38440 }, { "epoch": 1.1187712714896587, "grad_norm": 17.125, "learning_rate": 6.270764252484002e-06, "loss": 1.4371, "step": 38460 }, { "epoch": 1.1193530558222067, "grad_norm": 13.25, "learning_rate": 6.268824970811387e-06, "loss": 1.5051, "step": 38480 }, { "epoch": 1.1199348401547546, "grad_norm": 12.0, "learning_rate": 6.266885689138772e-06, "loss": 1.5281, "step": 38500 }, { "epoch": 1.1205166244873026, "grad_norm": 14.125, "learning_rate": 6.264946407466157e-06, "loss": 1.6, "step": 38520 }, { "epoch": 1.1210984088198506, "grad_norm": 12.5, "learning_rate": 6.263007125793542e-06, "loss": 1.4649, "step": 38540 }, { "epoch": 1.1216801931523983, "grad_norm": 15.5625, "learning_rate": 6.261067844120927e-06, "loss": 1.5477, "step": 38560 }, { "epoch": 1.1222619774849463, "grad_norm": 11.8125, "learning_rate": 6.2591285624483125e-06, "loss": 1.5681, "step": 38580 }, { "epoch": 1.1228437618174942, "grad_norm": 12.0625, "learning_rate": 6.257189280775698e-06, "loss": 1.4782, "step": 38600 }, { "epoch": 1.1234255461500422, "grad_norm": 14.625, "learning_rate": 6.255249999103083e-06, "loss": 1.5647, "step": 38620 }, { "epoch": 1.1240073304825902, "grad_norm": 12.6875, "learning_rate": 6.253310717430468e-06, "loss": 1.4799, "step": 38640 }, { "epoch": 1.1245891148151381, "grad_norm": 11.25, "learning_rate": 6.251371435757853e-06, "loss": 1.4928, "step": 38660 }, { "epoch": 1.1251708991476859, "grad_norm": 12.6875, "learning_rate": 6.249432154085238e-06, "loss": 1.5306, "step": 38680 }, { "epoch": 1.1257526834802338, "grad_norm": 13.75, "learning_rate": 6.247492872412623e-06, "loss": 1.4904, "step": 38700 }, { "epoch": 1.1263344678127818, "grad_norm": 12.875, "learning_rate": 6.245553590740008e-06, "loss": 1.5334, "step": 38720 }, { "epoch": 1.1269162521453298, "grad_norm": 13.375, "learning_rate": 6.2436143090673924e-06, "loss": 1.5545, "step": 38740 }, { "epoch": 1.1274980364778777, "grad_norm": 14.3125, "learning_rate": 6.2416750273947775e-06, "loss": 1.5655, "step": 38760 }, { "epoch": 1.1280798208104255, "grad_norm": 17.0, "learning_rate": 6.239735745722163e-06, "loss": 1.4399, "step": 38780 }, { "epoch": 1.1286616051429734, "grad_norm": 13.125, "learning_rate": 6.237796464049548e-06, "loss": 1.5477, "step": 38800 }, { "epoch": 1.1292433894755214, "grad_norm": 12.8125, "learning_rate": 6.235857182376933e-06, "loss": 1.5395, "step": 38820 }, { "epoch": 1.1298251738080693, "grad_norm": 12.0, "learning_rate": 6.233917900704318e-06, "loss": 1.5085, "step": 38840 }, { "epoch": 1.1304069581406173, "grad_norm": 17.375, "learning_rate": 6.231978619031703e-06, "loss": 1.5742, "step": 38860 }, { "epoch": 1.1309887424731653, "grad_norm": 12.6875, "learning_rate": 6.230039337359088e-06, "loss": 1.4685, "step": 38880 }, { "epoch": 1.1315705268057132, "grad_norm": 12.4375, "learning_rate": 6.228100055686473e-06, "loss": 1.593, "step": 38900 }, { "epoch": 1.132152311138261, "grad_norm": 12.8125, "learning_rate": 6.226160774013858e-06, "loss": 1.5004, "step": 38920 }, { "epoch": 1.132734095470809, "grad_norm": 13.9375, "learning_rate": 6.224221492341243e-06, "loss": 1.5268, "step": 38940 }, { "epoch": 1.133315879803357, "grad_norm": 12.625, "learning_rate": 6.2222822106686285e-06, "loss": 1.5868, "step": 38960 }, { "epoch": 1.1338976641359049, "grad_norm": 17.125, "learning_rate": 6.220342928996014e-06, "loss": 1.538, "step": 38980 }, { "epoch": 1.1344794484684528, "grad_norm": 12.5625, "learning_rate": 6.218403647323399e-06, "loss": 1.4494, "step": 39000 }, { "epoch": 1.1350612328010006, "grad_norm": 13.5, "learning_rate": 6.216464365650784e-06, "loss": 1.5108, "step": 39020 }, { "epoch": 1.1356430171335485, "grad_norm": 14.25, "learning_rate": 6.214525083978169e-06, "loss": 1.4658, "step": 39040 }, { "epoch": 1.1362248014660965, "grad_norm": 12.25, "learning_rate": 6.212585802305554e-06, "loss": 1.5474, "step": 39060 }, { "epoch": 1.1368065857986445, "grad_norm": 13.75, "learning_rate": 6.210646520632939e-06, "loss": 1.5372, "step": 39080 }, { "epoch": 1.1373883701311924, "grad_norm": 13.125, "learning_rate": 6.208707238960324e-06, "loss": 1.3963, "step": 39100 }, { "epoch": 1.1379701544637402, "grad_norm": 14.0, "learning_rate": 6.206767957287709e-06, "loss": 1.527, "step": 39120 }, { "epoch": 1.1385519387962881, "grad_norm": 12.4375, "learning_rate": 6.204828675615094e-06, "loss": 1.5576, "step": 39140 }, { "epoch": 1.139133723128836, "grad_norm": 12.5625, "learning_rate": 6.2028893939424795e-06, "loss": 1.5017, "step": 39160 }, { "epoch": 1.139715507461384, "grad_norm": 13.4375, "learning_rate": 6.2009501122698646e-06, "loss": 1.5192, "step": 39180 }, { "epoch": 1.140297291793932, "grad_norm": 11.75, "learning_rate": 6.19901083059725e-06, "loss": 1.4632, "step": 39200 }, { "epoch": 1.14087907612648, "grad_norm": 14.5625, "learning_rate": 6.197071548924635e-06, "loss": 1.5165, "step": 39220 }, { "epoch": 1.141460860459028, "grad_norm": 13.5, "learning_rate": 6.19513226725202e-06, "loss": 1.5024, "step": 39240 }, { "epoch": 1.1420426447915757, "grad_norm": 15.8125, "learning_rate": 6.193192985579405e-06, "loss": 1.4939, "step": 39260 }, { "epoch": 1.1426244291241237, "grad_norm": 12.125, "learning_rate": 6.19125370390679e-06, "loss": 1.5697, "step": 39280 }, { "epoch": 1.1432062134566716, "grad_norm": 14.4375, "learning_rate": 6.189314422234175e-06, "loss": 1.4966, "step": 39300 }, { "epoch": 1.1437879977892196, "grad_norm": 13.0625, "learning_rate": 6.18737514056156e-06, "loss": 1.4836, "step": 39320 }, { "epoch": 1.1443697821217675, "grad_norm": 10.4375, "learning_rate": 6.185435858888945e-06, "loss": 1.5408, "step": 39340 }, { "epoch": 1.1449515664543153, "grad_norm": 13.6875, "learning_rate": 6.1834965772163305e-06, "loss": 1.3676, "step": 39360 }, { "epoch": 1.1455333507868632, "grad_norm": 11.1875, "learning_rate": 6.1815572955437156e-06, "loss": 1.5173, "step": 39380 }, { "epoch": 1.1461151351194112, "grad_norm": 10.6875, "learning_rate": 6.179618013871101e-06, "loss": 1.4561, "step": 39400 }, { "epoch": 1.1466969194519592, "grad_norm": 11.1875, "learning_rate": 6.177678732198486e-06, "loss": 1.4594, "step": 39420 }, { "epoch": 1.1472787037845071, "grad_norm": 15.375, "learning_rate": 6.175739450525871e-06, "loss": 1.5095, "step": 39440 }, { "epoch": 1.147860488117055, "grad_norm": 13.5625, "learning_rate": 6.173800168853256e-06, "loss": 1.5298, "step": 39460 }, { "epoch": 1.1484422724496028, "grad_norm": 12.75, "learning_rate": 6.171860887180641e-06, "loss": 1.4985, "step": 39480 }, { "epoch": 1.1490240567821508, "grad_norm": 11.4375, "learning_rate": 6.169921605508026e-06, "loss": 1.5058, "step": 39500 }, { "epoch": 1.1496058411146988, "grad_norm": 14.4375, "learning_rate": 6.167982323835411e-06, "loss": 1.5532, "step": 39520 }, { "epoch": 1.1501876254472467, "grad_norm": 13.25, "learning_rate": 6.166043042162796e-06, "loss": 1.5766, "step": 39540 }, { "epoch": 1.1507694097797947, "grad_norm": 15.5, "learning_rate": 6.164103760490181e-06, "loss": 1.5411, "step": 39560 }, { "epoch": 1.1513511941123427, "grad_norm": 15.5625, "learning_rate": 6.162164478817566e-06, "loss": 1.6242, "step": 39580 }, { "epoch": 1.1519329784448904, "grad_norm": 10.4375, "learning_rate": 6.160225197144951e-06, "loss": 1.4599, "step": 39600 }, { "epoch": 1.1525147627774384, "grad_norm": 10.5, "learning_rate": 6.158285915472336e-06, "loss": 1.5049, "step": 39620 }, { "epoch": 1.1530965471099863, "grad_norm": 13.875, "learning_rate": 6.156346633799721e-06, "loss": 1.5083, "step": 39640 }, { "epoch": 1.1536783314425343, "grad_norm": 15.5625, "learning_rate": 6.154407352127106e-06, "loss": 1.578, "step": 39660 }, { "epoch": 1.1542601157750823, "grad_norm": 13.3125, "learning_rate": 6.152468070454491e-06, "loss": 1.5239, "step": 39680 }, { "epoch": 1.15484190010763, "grad_norm": 16.25, "learning_rate": 6.150528788781876e-06, "loss": 1.5221, "step": 39700 }, { "epoch": 1.155423684440178, "grad_norm": 12.25, "learning_rate": 6.148589507109261e-06, "loss": 1.5173, "step": 39720 }, { "epoch": 1.156005468772726, "grad_norm": 12.25, "learning_rate": 6.1466502254366465e-06, "loss": 1.5572, "step": 39740 }, { "epoch": 1.1565872531052739, "grad_norm": 12.5625, "learning_rate": 6.1447109437640316e-06, "loss": 1.5101, "step": 39760 }, { "epoch": 1.1571690374378218, "grad_norm": 12.6875, "learning_rate": 6.142771662091417e-06, "loss": 1.4834, "step": 39780 }, { "epoch": 1.1577508217703698, "grad_norm": 11.0, "learning_rate": 6.140832380418802e-06, "loss": 1.42, "step": 39800 }, { "epoch": 1.1583326061029178, "grad_norm": 12.875, "learning_rate": 6.138893098746187e-06, "loss": 1.5027, "step": 39820 }, { "epoch": 1.1589143904354655, "grad_norm": 13.0625, "learning_rate": 6.136953817073572e-06, "loss": 1.4606, "step": 39840 }, { "epoch": 1.1594961747680135, "grad_norm": 15.1875, "learning_rate": 6.135014535400957e-06, "loss": 1.4864, "step": 39860 }, { "epoch": 1.1600779591005614, "grad_norm": 12.4375, "learning_rate": 6.133075253728342e-06, "loss": 1.5325, "step": 39880 }, { "epoch": 1.1606597434331094, "grad_norm": 13.625, "learning_rate": 6.131135972055727e-06, "loss": 1.6125, "step": 39900 }, { "epoch": 1.1612415277656574, "grad_norm": 13.5, "learning_rate": 6.129196690383112e-06, "loss": 1.4932, "step": 39920 }, { "epoch": 1.161823312098205, "grad_norm": 15.75, "learning_rate": 6.1272574087104974e-06, "loss": 1.5064, "step": 39940 }, { "epoch": 1.162405096430753, "grad_norm": 13.6875, "learning_rate": 6.1253181270378825e-06, "loss": 1.4966, "step": 39960 }, { "epoch": 1.162986880763301, "grad_norm": 14.75, "learning_rate": 6.123378845365268e-06, "loss": 1.5413, "step": 39980 }, { "epoch": 1.163568665095849, "grad_norm": 16.75, "learning_rate": 6.121439563692653e-06, "loss": 1.4682, "step": 40000 }, { "epoch": 1.164150449428397, "grad_norm": 13.5625, "learning_rate": 6.119500282020038e-06, "loss": 1.4839, "step": 40020 }, { "epoch": 1.1647322337609447, "grad_norm": 14.25, "learning_rate": 6.117561000347423e-06, "loss": 1.5623, "step": 40040 }, { "epoch": 1.1653140180934927, "grad_norm": 12.25, "learning_rate": 6.115621718674808e-06, "loss": 1.4892, "step": 40060 }, { "epoch": 1.1658958024260406, "grad_norm": 12.3125, "learning_rate": 6.113682437002193e-06, "loss": 1.5352, "step": 40080 }, { "epoch": 1.1664775867585886, "grad_norm": 12.375, "learning_rate": 6.111743155329578e-06, "loss": 1.5495, "step": 40100 }, { "epoch": 1.1670593710911366, "grad_norm": 13.375, "learning_rate": 6.109803873656963e-06, "loss": 1.4987, "step": 40120 }, { "epoch": 1.1676411554236845, "grad_norm": 14.0625, "learning_rate": 6.107864591984348e-06, "loss": 1.5625, "step": 40140 }, { "epoch": 1.1682229397562325, "grad_norm": 14.125, "learning_rate": 6.1059253103117335e-06, "loss": 1.5876, "step": 40160 }, { "epoch": 1.1688047240887802, "grad_norm": 11.75, "learning_rate": 6.103986028639119e-06, "loss": 1.5188, "step": 40180 }, { "epoch": 1.1693865084213282, "grad_norm": 14.4375, "learning_rate": 6.102046746966504e-06, "loss": 1.5524, "step": 40200 }, { "epoch": 1.1699682927538761, "grad_norm": 16.375, "learning_rate": 6.100107465293889e-06, "loss": 1.488, "step": 40220 }, { "epoch": 1.1705500770864241, "grad_norm": 12.5, "learning_rate": 6.098168183621274e-06, "loss": 1.4927, "step": 40240 }, { "epoch": 1.171131861418972, "grad_norm": 10.6875, "learning_rate": 6.096228901948659e-06, "loss": 1.493, "step": 40260 }, { "epoch": 1.1717136457515198, "grad_norm": 11.1875, "learning_rate": 6.094289620276044e-06, "loss": 1.4916, "step": 40280 }, { "epoch": 1.1722954300840678, "grad_norm": 13.4375, "learning_rate": 6.092350338603429e-06, "loss": 1.5093, "step": 40300 }, { "epoch": 1.1728772144166157, "grad_norm": 12.5625, "learning_rate": 6.090411056930814e-06, "loss": 1.4595, "step": 40320 }, { "epoch": 1.1734589987491637, "grad_norm": 15.875, "learning_rate": 6.088471775258199e-06, "loss": 1.6267, "step": 40340 }, { "epoch": 1.1740407830817117, "grad_norm": 13.875, "learning_rate": 6.0865324935855845e-06, "loss": 1.5344, "step": 40360 }, { "epoch": 1.1746225674142594, "grad_norm": 14.3125, "learning_rate": 6.084593211912969e-06, "loss": 1.5923, "step": 40380 }, { "epoch": 1.1752043517468074, "grad_norm": 13.5, "learning_rate": 6.082653930240354e-06, "loss": 1.4998, "step": 40400 }, { "epoch": 1.1757861360793553, "grad_norm": 13.5625, "learning_rate": 6.080714648567739e-06, "loss": 1.4985, "step": 40420 }, { "epoch": 1.1763679204119033, "grad_norm": 14.875, "learning_rate": 6.078775366895124e-06, "loss": 1.4674, "step": 40440 }, { "epoch": 1.1769497047444513, "grad_norm": 13.6875, "learning_rate": 6.076836085222509e-06, "loss": 1.5184, "step": 40460 }, { "epoch": 1.1775314890769992, "grad_norm": 14.5, "learning_rate": 6.074896803549894e-06, "loss": 1.4749, "step": 40480 }, { "epoch": 1.1781132734095472, "grad_norm": 13.375, "learning_rate": 6.072957521877279e-06, "loss": 1.6339, "step": 40500 }, { "epoch": 1.178695057742095, "grad_norm": 12.5625, "learning_rate": 6.071018240204664e-06, "loss": 1.4874, "step": 40520 }, { "epoch": 1.179276842074643, "grad_norm": 12.0625, "learning_rate": 6.0690789585320495e-06, "loss": 1.5409, "step": 40540 }, { "epoch": 1.1798586264071909, "grad_norm": 10.75, "learning_rate": 6.067139676859435e-06, "loss": 1.6283, "step": 40560 }, { "epoch": 1.1804404107397388, "grad_norm": 12.0, "learning_rate": 6.06520039518682e-06, "loss": 1.5583, "step": 40580 }, { "epoch": 1.1810221950722868, "grad_norm": 11.25, "learning_rate": 6.063261113514205e-06, "loss": 1.5938, "step": 40600 }, { "epoch": 1.1816039794048345, "grad_norm": 12.4375, "learning_rate": 6.06132183184159e-06, "loss": 1.5019, "step": 40620 }, { "epoch": 1.1821857637373825, "grad_norm": 13.875, "learning_rate": 6.059382550168975e-06, "loss": 1.585, "step": 40640 }, { "epoch": 1.1827675480699305, "grad_norm": 11.0625, "learning_rate": 6.05744326849636e-06, "loss": 1.5273, "step": 40660 }, { "epoch": 1.1833493324024784, "grad_norm": 10.5625, "learning_rate": 6.055503986823745e-06, "loss": 1.5024, "step": 40680 }, { "epoch": 1.1839311167350264, "grad_norm": 14.8125, "learning_rate": 6.05356470515113e-06, "loss": 1.5544, "step": 40700 }, { "epoch": 1.1845129010675743, "grad_norm": 12.9375, "learning_rate": 6.051625423478515e-06, "loss": 1.4653, "step": 40720 }, { "epoch": 1.185094685400122, "grad_norm": 12.125, "learning_rate": 6.0496861418059005e-06, "loss": 1.5179, "step": 40740 }, { "epoch": 1.18567646973267, "grad_norm": 11.4375, "learning_rate": 6.047746860133286e-06, "loss": 1.5144, "step": 40760 }, { "epoch": 1.186258254065218, "grad_norm": 8.75, "learning_rate": 6.045807578460671e-06, "loss": 1.4804, "step": 40780 }, { "epoch": 1.186840038397766, "grad_norm": 11.125, "learning_rate": 6.043868296788056e-06, "loss": 1.5792, "step": 40800 }, { "epoch": 1.187421822730314, "grad_norm": 13.875, "learning_rate": 6.041929015115441e-06, "loss": 1.4975, "step": 40820 }, { "epoch": 1.188003607062862, "grad_norm": 13.1875, "learning_rate": 6.039989733442826e-06, "loss": 1.5481, "step": 40840 }, { "epoch": 1.1885853913954096, "grad_norm": 13.5, "learning_rate": 6.038050451770211e-06, "loss": 1.4825, "step": 40860 }, { "epoch": 1.1891671757279576, "grad_norm": 12.8125, "learning_rate": 6.036111170097596e-06, "loss": 1.4976, "step": 40880 }, { "epoch": 1.1897489600605056, "grad_norm": 13.5, "learning_rate": 6.034171888424981e-06, "loss": 1.4153, "step": 40900 }, { "epoch": 1.1903307443930535, "grad_norm": 12.875, "learning_rate": 6.032232606752366e-06, "loss": 1.5186, "step": 40920 }, { "epoch": 1.1909125287256015, "grad_norm": 14.6875, "learning_rate": 6.0302933250797515e-06, "loss": 1.5677, "step": 40940 }, { "epoch": 1.1914943130581492, "grad_norm": 13.5625, "learning_rate": 6.0283540434071366e-06, "loss": 1.5399, "step": 40960 }, { "epoch": 1.1920760973906972, "grad_norm": 11.875, "learning_rate": 6.026414761734522e-06, "loss": 1.543, "step": 40980 }, { "epoch": 1.1926578817232452, "grad_norm": 12.625, "learning_rate": 6.024475480061907e-06, "loss": 1.5075, "step": 41000 }, { "epoch": 1.1932396660557931, "grad_norm": 15.3125, "learning_rate": 6.022536198389292e-06, "loss": 1.519, "step": 41020 }, { "epoch": 1.193821450388341, "grad_norm": 12.6875, "learning_rate": 6.020596916716677e-06, "loss": 1.4778, "step": 41040 }, { "epoch": 1.194403234720889, "grad_norm": 11.4375, "learning_rate": 6.018657635044062e-06, "loss": 1.4882, "step": 41060 }, { "epoch": 1.194985019053437, "grad_norm": 15.0625, "learning_rate": 6.016718353371447e-06, "loss": 1.5328, "step": 41080 }, { "epoch": 1.1955668033859848, "grad_norm": 13.375, "learning_rate": 6.014779071698832e-06, "loss": 1.4973, "step": 41100 }, { "epoch": 1.1961485877185327, "grad_norm": 14.875, "learning_rate": 6.012839790026217e-06, "loss": 1.5371, "step": 41120 }, { "epoch": 1.1967303720510807, "grad_norm": 13.1875, "learning_rate": 6.0109005083536024e-06, "loss": 1.5427, "step": 41140 }, { "epoch": 1.1973121563836286, "grad_norm": 13.0625, "learning_rate": 6.0089612266809875e-06, "loss": 1.4804, "step": 41160 }, { "epoch": 1.1978939407161766, "grad_norm": 15.5625, "learning_rate": 6.007021945008373e-06, "loss": 1.5384, "step": 41180 }, { "epoch": 1.1984757250487243, "grad_norm": 12.625, "learning_rate": 6.005082663335757e-06, "loss": 1.5363, "step": 41200 }, { "epoch": 1.1990575093812723, "grad_norm": 13.0, "learning_rate": 6.003143381663142e-06, "loss": 1.5551, "step": 41220 }, { "epoch": 1.1996392937138203, "grad_norm": 12.5, "learning_rate": 6.001204099990527e-06, "loss": 1.5575, "step": 41240 }, { "epoch": 1.2002210780463682, "grad_norm": 13.0, "learning_rate": 5.999264818317912e-06, "loss": 1.586, "step": 41260 }, { "epoch": 1.2008028623789162, "grad_norm": 12.8125, "learning_rate": 5.997325536645297e-06, "loss": 1.569, "step": 41280 }, { "epoch": 1.201384646711464, "grad_norm": 12.1875, "learning_rate": 5.995386254972682e-06, "loss": 1.5458, "step": 41300 }, { "epoch": 1.201966431044012, "grad_norm": 13.4375, "learning_rate": 5.9934469733000675e-06, "loss": 1.523, "step": 41320 }, { "epoch": 1.2025482153765599, "grad_norm": 13.9375, "learning_rate": 5.9915076916274526e-06, "loss": 1.5145, "step": 41340 }, { "epoch": 1.2031299997091078, "grad_norm": 12.875, "learning_rate": 5.989568409954838e-06, "loss": 1.5263, "step": 41360 }, { "epoch": 1.2037117840416558, "grad_norm": 13.0, "learning_rate": 5.987629128282223e-06, "loss": 1.606, "step": 41380 }, { "epoch": 1.2042935683742038, "grad_norm": 12.5, "learning_rate": 5.985689846609608e-06, "loss": 1.5428, "step": 41400 }, { "epoch": 1.2048753527067517, "grad_norm": 12.75, "learning_rate": 5.983750564936993e-06, "loss": 1.4788, "step": 41420 }, { "epoch": 1.2054571370392995, "grad_norm": 13.875, "learning_rate": 5.981811283264378e-06, "loss": 1.5216, "step": 41440 }, { "epoch": 1.2060389213718474, "grad_norm": 15.0625, "learning_rate": 5.979872001591763e-06, "loss": 1.606, "step": 41460 }, { "epoch": 1.2066207057043954, "grad_norm": 12.875, "learning_rate": 5.977932719919148e-06, "loss": 1.4665, "step": 41480 }, { "epoch": 1.2072024900369434, "grad_norm": 13.5, "learning_rate": 5.975993438246533e-06, "loss": 1.5042, "step": 41500 }, { "epoch": 1.2077842743694913, "grad_norm": 14.4375, "learning_rate": 5.9740541565739184e-06, "loss": 1.5428, "step": 41520 }, { "epoch": 1.208366058702039, "grad_norm": 14.625, "learning_rate": 5.9721148749013035e-06, "loss": 1.5344, "step": 41540 }, { "epoch": 1.208947843034587, "grad_norm": 14.4375, "learning_rate": 5.970175593228689e-06, "loss": 1.5693, "step": 41560 }, { "epoch": 1.209529627367135, "grad_norm": 14.875, "learning_rate": 5.968236311556074e-06, "loss": 1.4785, "step": 41580 }, { "epoch": 1.210111411699683, "grad_norm": 14.375, "learning_rate": 5.966297029883459e-06, "loss": 1.6008, "step": 41600 }, { "epoch": 1.210693196032231, "grad_norm": 14.3125, "learning_rate": 5.964357748210844e-06, "loss": 1.4983, "step": 41620 }, { "epoch": 1.2112749803647787, "grad_norm": 14.8125, "learning_rate": 5.962418466538229e-06, "loss": 1.5593, "step": 41640 }, { "epoch": 1.2118567646973266, "grad_norm": 12.875, "learning_rate": 5.960479184865614e-06, "loss": 1.4954, "step": 41660 }, { "epoch": 1.2124385490298746, "grad_norm": 13.1875, "learning_rate": 5.958539903192999e-06, "loss": 1.4851, "step": 41680 }, { "epoch": 1.2130203333624225, "grad_norm": 13.5, "learning_rate": 5.956600621520384e-06, "loss": 1.4913, "step": 41700 }, { "epoch": 1.2136021176949705, "grad_norm": 12.625, "learning_rate": 5.954661339847769e-06, "loss": 1.5791, "step": 41720 }, { "epoch": 1.2141839020275185, "grad_norm": 12.625, "learning_rate": 5.9527220581751545e-06, "loss": 1.5684, "step": 41740 }, { "epoch": 1.2147656863600664, "grad_norm": 12.75, "learning_rate": 5.95078277650254e-06, "loss": 1.5621, "step": 41760 }, { "epoch": 1.2153474706926142, "grad_norm": 13.5, "learning_rate": 5.948843494829925e-06, "loss": 1.5606, "step": 41780 }, { "epoch": 1.2159292550251621, "grad_norm": 13.3125, "learning_rate": 5.946904213157308e-06, "loss": 1.5274, "step": 41800 }, { "epoch": 1.21651103935771, "grad_norm": 14.375, "learning_rate": 5.944964931484693e-06, "loss": 1.4346, "step": 41820 }, { "epoch": 1.217092823690258, "grad_norm": 10.0, "learning_rate": 5.943025649812078e-06, "loss": 1.4874, "step": 41840 }, { "epoch": 1.217674608022806, "grad_norm": 13.3125, "learning_rate": 5.941086368139463e-06, "loss": 1.5017, "step": 41860 }, { "epoch": 1.2182563923553538, "grad_norm": 16.125, "learning_rate": 5.9391470864668485e-06, "loss": 1.4754, "step": 41880 }, { "epoch": 1.2188381766879017, "grad_norm": 14.0625, "learning_rate": 5.937207804794234e-06, "loss": 1.5927, "step": 41900 }, { "epoch": 1.2194199610204497, "grad_norm": 16.125, "learning_rate": 5.935268523121619e-06, "loss": 1.4845, "step": 41920 }, { "epoch": 1.2200017453529977, "grad_norm": 13.3125, "learning_rate": 5.933329241449004e-06, "loss": 1.53, "step": 41940 }, { "epoch": 1.2205835296855456, "grad_norm": 14.0625, "learning_rate": 5.931389959776389e-06, "loss": 1.5277, "step": 41960 }, { "epoch": 1.2211653140180936, "grad_norm": 12.75, "learning_rate": 5.929450678103774e-06, "loss": 1.5369, "step": 41980 }, { "epoch": 1.2217470983506413, "grad_norm": 12.3125, "learning_rate": 5.927511396431159e-06, "loss": 1.5688, "step": 42000 }, { "epoch": 1.2223288826831893, "grad_norm": 12.3125, "learning_rate": 5.925572114758544e-06, "loss": 1.5271, "step": 42020 }, { "epoch": 1.2229106670157373, "grad_norm": 11.9375, "learning_rate": 5.923632833085929e-06, "loss": 1.5179, "step": 42040 }, { "epoch": 1.2234924513482852, "grad_norm": 14.9375, "learning_rate": 5.921693551413314e-06, "loss": 1.5587, "step": 42060 }, { "epoch": 1.2240742356808332, "grad_norm": 13.1875, "learning_rate": 5.9197542697406995e-06, "loss": 1.5901, "step": 42080 }, { "epoch": 1.2246560200133811, "grad_norm": 11.625, "learning_rate": 5.9178149880680846e-06, "loss": 1.5209, "step": 42100 }, { "epoch": 1.2252378043459289, "grad_norm": 12.9375, "learning_rate": 5.91587570639547e-06, "loss": 1.5068, "step": 42120 }, { "epoch": 1.2258195886784768, "grad_norm": 15.375, "learning_rate": 5.913936424722855e-06, "loss": 1.4771, "step": 42140 }, { "epoch": 1.2264013730110248, "grad_norm": 13.8125, "learning_rate": 5.91199714305024e-06, "loss": 1.532, "step": 42160 }, { "epoch": 1.2269831573435728, "grad_norm": 13.0625, "learning_rate": 5.910057861377625e-06, "loss": 1.5224, "step": 42180 }, { "epoch": 1.2275649416761207, "grad_norm": 14.375, "learning_rate": 5.90811857970501e-06, "loss": 1.5713, "step": 42200 }, { "epoch": 1.2281467260086685, "grad_norm": 13.125, "learning_rate": 5.906179298032395e-06, "loss": 1.6152, "step": 42220 }, { "epoch": 1.2287285103412164, "grad_norm": 10.125, "learning_rate": 5.90424001635978e-06, "loss": 1.5322, "step": 42240 }, { "epoch": 1.2293102946737644, "grad_norm": 11.9375, "learning_rate": 5.902300734687165e-06, "loss": 1.4988, "step": 42260 }, { "epoch": 1.2298920790063124, "grad_norm": 17.375, "learning_rate": 5.9003614530145504e-06, "loss": 1.5966, "step": 42280 }, { "epoch": 1.2304738633388603, "grad_norm": 10.5, "learning_rate": 5.8984221713419355e-06, "loss": 1.5006, "step": 42300 }, { "epoch": 1.2310556476714083, "grad_norm": 12.6875, "learning_rate": 5.896482889669321e-06, "loss": 1.5911, "step": 42320 }, { "epoch": 1.2316374320039563, "grad_norm": 14.9375, "learning_rate": 5.894543607996706e-06, "loss": 1.5205, "step": 42340 }, { "epoch": 1.232219216336504, "grad_norm": 12.0625, "learning_rate": 5.892604326324091e-06, "loss": 1.4876, "step": 42360 }, { "epoch": 1.232801000669052, "grad_norm": 13.5, "learning_rate": 5.890665044651476e-06, "loss": 1.5186, "step": 42380 }, { "epoch": 1.2333827850016, "grad_norm": 12.875, "learning_rate": 5.888725762978861e-06, "loss": 1.4763, "step": 42400 }, { "epoch": 1.2339645693341479, "grad_norm": 14.125, "learning_rate": 5.886786481306246e-06, "loss": 1.5486, "step": 42420 }, { "epoch": 1.2345463536666959, "grad_norm": 12.875, "learning_rate": 5.884847199633631e-06, "loss": 1.4882, "step": 42440 }, { "epoch": 1.2351281379992436, "grad_norm": 12.6875, "learning_rate": 5.882907917961016e-06, "loss": 1.5763, "step": 42460 }, { "epoch": 1.2357099223317916, "grad_norm": 13.4375, "learning_rate": 5.8809686362884014e-06, "loss": 1.452, "step": 42480 }, { "epoch": 1.2362917066643395, "grad_norm": 14.5625, "learning_rate": 5.8790293546157865e-06, "loss": 1.5535, "step": 42500 }, { "epoch": 1.2368734909968875, "grad_norm": 14.8125, "learning_rate": 5.877090072943172e-06, "loss": 1.5531, "step": 42520 }, { "epoch": 1.2374552753294354, "grad_norm": 12.375, "learning_rate": 5.875150791270557e-06, "loss": 1.4908, "step": 42540 }, { "epoch": 1.2380370596619832, "grad_norm": 14.0, "learning_rate": 5.873211509597942e-06, "loss": 1.4919, "step": 42560 }, { "epoch": 1.2386188439945311, "grad_norm": 12.9375, "learning_rate": 5.871272227925326e-06, "loss": 1.4404, "step": 42580 }, { "epoch": 1.2392006283270791, "grad_norm": 13.9375, "learning_rate": 5.869332946252711e-06, "loss": 1.4845, "step": 42600 }, { "epoch": 1.239782412659627, "grad_norm": 12.1875, "learning_rate": 5.867393664580096e-06, "loss": 1.5165, "step": 42620 }, { "epoch": 1.240364196992175, "grad_norm": 13.0, "learning_rate": 5.865454382907481e-06, "loss": 1.5542, "step": 42640 }, { "epoch": 1.240945981324723, "grad_norm": 12.375, "learning_rate": 5.8635151012348665e-06, "loss": 1.4909, "step": 42660 }, { "epoch": 1.241527765657271, "grad_norm": 12.375, "learning_rate": 5.8615758195622516e-06, "loss": 1.5737, "step": 42680 }, { "epoch": 1.2421095499898187, "grad_norm": 12.1875, "learning_rate": 5.859636537889637e-06, "loss": 1.5644, "step": 42700 }, { "epoch": 1.2426913343223667, "grad_norm": 13.625, "learning_rate": 5.857697256217022e-06, "loss": 1.596, "step": 42720 }, { "epoch": 1.2432731186549146, "grad_norm": 11.5625, "learning_rate": 5.855757974544407e-06, "loss": 1.4895, "step": 42740 }, { "epoch": 1.2438549029874626, "grad_norm": 14.5, "learning_rate": 5.853818692871792e-06, "loss": 1.5701, "step": 42760 }, { "epoch": 1.2444366873200106, "grad_norm": 14.9375, "learning_rate": 5.851879411199177e-06, "loss": 1.5158, "step": 42780 }, { "epoch": 1.2450184716525583, "grad_norm": 14.8125, "learning_rate": 5.849940129526562e-06, "loss": 1.5914, "step": 42800 }, { "epoch": 1.2456002559851063, "grad_norm": 11.5625, "learning_rate": 5.848000847853947e-06, "loss": 1.5527, "step": 42820 }, { "epoch": 1.2461820403176542, "grad_norm": 13.625, "learning_rate": 5.846061566181332e-06, "loss": 1.5446, "step": 42840 }, { "epoch": 1.2467638246502022, "grad_norm": 12.625, "learning_rate": 5.8441222845087174e-06, "loss": 1.4778, "step": 42860 }, { "epoch": 1.2473456089827502, "grad_norm": 15.875, "learning_rate": 5.8421830028361025e-06, "loss": 1.6255, "step": 42880 }, { "epoch": 1.247927393315298, "grad_norm": 14.125, "learning_rate": 5.840243721163488e-06, "loss": 1.5588, "step": 42900 }, { "epoch": 1.2485091776478459, "grad_norm": 13.125, "learning_rate": 5.838304439490873e-06, "loss": 1.5103, "step": 42920 }, { "epoch": 1.2490909619803938, "grad_norm": 14.9375, "learning_rate": 5.836365157818258e-06, "loss": 1.5243, "step": 42940 }, { "epoch": 1.2496727463129418, "grad_norm": 13.125, "learning_rate": 5.834425876145643e-06, "loss": 1.5647, "step": 42960 }, { "epoch": 1.2502545306454897, "grad_norm": 12.375, "learning_rate": 5.832486594473028e-06, "loss": 1.5204, "step": 42980 }, { "epoch": 1.2508363149780377, "grad_norm": 11.1875, "learning_rate": 5.830547312800413e-06, "loss": 1.5199, "step": 43000 }, { "epoch": 1.2514180993105857, "grad_norm": 13.1875, "learning_rate": 5.828608031127798e-06, "loss": 1.5245, "step": 43020 }, { "epoch": 1.2519998836431334, "grad_norm": 13.3125, "learning_rate": 5.826668749455183e-06, "loss": 1.5144, "step": 43040 }, { "epoch": 1.2525816679756814, "grad_norm": 13.4375, "learning_rate": 5.824729467782568e-06, "loss": 1.5476, "step": 43060 }, { "epoch": 1.2531634523082293, "grad_norm": 13.625, "learning_rate": 5.8227901861099535e-06, "loss": 1.4843, "step": 43080 }, { "epoch": 1.2537452366407773, "grad_norm": 14.125, "learning_rate": 5.820850904437339e-06, "loss": 1.4631, "step": 43100 }, { "epoch": 1.2543270209733253, "grad_norm": 13.25, "learning_rate": 5.818911622764724e-06, "loss": 1.5837, "step": 43120 }, { "epoch": 1.254908805305873, "grad_norm": 15.0625, "learning_rate": 5.816972341092109e-06, "loss": 1.4783, "step": 43140 }, { "epoch": 1.255490589638421, "grad_norm": 11.625, "learning_rate": 5.815033059419494e-06, "loss": 1.5691, "step": 43160 }, { "epoch": 1.256072373970969, "grad_norm": 12.8125, "learning_rate": 5.813093777746879e-06, "loss": 1.5289, "step": 43180 }, { "epoch": 1.256654158303517, "grad_norm": 15.625, "learning_rate": 5.811154496074264e-06, "loss": 1.5228, "step": 43200 }, { "epoch": 1.2572359426360649, "grad_norm": 15.9375, "learning_rate": 5.809215214401649e-06, "loss": 1.5698, "step": 43220 }, { "epoch": 1.2578177269686126, "grad_norm": 13.6875, "learning_rate": 5.807275932729034e-06, "loss": 1.511, "step": 43240 }, { "epoch": 1.2583995113011608, "grad_norm": 13.375, "learning_rate": 5.805336651056419e-06, "loss": 1.599, "step": 43260 }, { "epoch": 1.2589812956337085, "grad_norm": 11.875, "learning_rate": 5.8033973693838045e-06, "loss": 1.4926, "step": 43280 }, { "epoch": 1.2595630799662565, "grad_norm": 13.9375, "learning_rate": 5.8014580877111896e-06, "loss": 1.537, "step": 43300 }, { "epoch": 1.2601448642988045, "grad_norm": 16.125, "learning_rate": 5.799518806038575e-06, "loss": 1.4502, "step": 43320 }, { "epoch": 1.2607266486313524, "grad_norm": 14.25, "learning_rate": 5.79757952436596e-06, "loss": 1.4721, "step": 43340 }, { "epoch": 1.2613084329639004, "grad_norm": 12.5, "learning_rate": 5.795640242693345e-06, "loss": 1.537, "step": 43360 }, { "epoch": 1.2618902172964481, "grad_norm": 15.125, "learning_rate": 5.79370096102073e-06, "loss": 1.538, "step": 43380 }, { "epoch": 1.262472001628996, "grad_norm": 14.0625, "learning_rate": 5.791761679348114e-06, "loss": 1.5307, "step": 43400 }, { "epoch": 1.263053785961544, "grad_norm": 14.3125, "learning_rate": 5.789822397675499e-06, "loss": 1.5407, "step": 43420 }, { "epoch": 1.263635570294092, "grad_norm": 11.5, "learning_rate": 5.787883116002884e-06, "loss": 1.5532, "step": 43440 }, { "epoch": 1.26421735462664, "grad_norm": 11.5, "learning_rate": 5.7859438343302695e-06, "loss": 1.6063, "step": 43460 }, { "epoch": 1.2647991389591877, "grad_norm": 13.5, "learning_rate": 5.784004552657655e-06, "loss": 1.5031, "step": 43480 }, { "epoch": 1.2653809232917357, "grad_norm": 13.25, "learning_rate": 5.78206527098504e-06, "loss": 1.5268, "step": 43500 }, { "epoch": 1.2659627076242836, "grad_norm": 12.0625, "learning_rate": 5.780125989312425e-06, "loss": 1.4692, "step": 43520 }, { "epoch": 1.2665444919568316, "grad_norm": 14.25, "learning_rate": 5.77818670763981e-06, "loss": 1.4345, "step": 43540 }, { "epoch": 1.2671262762893796, "grad_norm": 13.375, "learning_rate": 5.776247425967195e-06, "loss": 1.4452, "step": 43560 }, { "epoch": 1.2677080606219273, "grad_norm": 9.8125, "learning_rate": 5.77430814429458e-06, "loss": 1.5405, "step": 43580 }, { "epoch": 1.2682898449544755, "grad_norm": 13.5625, "learning_rate": 5.772368862621965e-06, "loss": 1.5249, "step": 43600 }, { "epoch": 1.2688716292870232, "grad_norm": 15.5, "learning_rate": 5.77042958094935e-06, "loss": 1.499, "step": 43620 }, { "epoch": 1.2694534136195712, "grad_norm": 13.0625, "learning_rate": 5.768490299276735e-06, "loss": 1.4628, "step": 43640 }, { "epoch": 1.2700351979521192, "grad_norm": 13.125, "learning_rate": 5.7665510176041205e-06, "loss": 1.464, "step": 43660 }, { "epoch": 1.2706169822846671, "grad_norm": 11.375, "learning_rate": 5.764611735931506e-06, "loss": 1.5615, "step": 43680 }, { "epoch": 1.271198766617215, "grad_norm": 13.625, "learning_rate": 5.762672454258891e-06, "loss": 1.5503, "step": 43700 }, { "epoch": 1.2717805509497628, "grad_norm": 11.25, "learning_rate": 5.760733172586276e-06, "loss": 1.3915, "step": 43720 }, { "epoch": 1.2723623352823108, "grad_norm": 13.625, "learning_rate": 5.758793890913661e-06, "loss": 1.6215, "step": 43740 }, { "epoch": 1.2729441196148588, "grad_norm": 16.5, "learning_rate": 5.756854609241046e-06, "loss": 1.5395, "step": 43760 }, { "epoch": 1.2735259039474067, "grad_norm": 17.125, "learning_rate": 5.754915327568431e-06, "loss": 1.5404, "step": 43780 }, { "epoch": 1.2741076882799547, "grad_norm": 13.5, "learning_rate": 5.752976045895816e-06, "loss": 1.4823, "step": 43800 }, { "epoch": 1.2746894726125024, "grad_norm": 12.0625, "learning_rate": 5.751036764223201e-06, "loss": 1.5202, "step": 43820 }, { "epoch": 1.2752712569450504, "grad_norm": 14.875, "learning_rate": 5.749097482550586e-06, "loss": 1.5736, "step": 43840 }, { "epoch": 1.2758530412775984, "grad_norm": 12.5625, "learning_rate": 5.7471582008779715e-06, "loss": 1.4768, "step": 43860 }, { "epoch": 1.2764348256101463, "grad_norm": 16.875, "learning_rate": 5.7452189192053566e-06, "loss": 1.5277, "step": 43880 }, { "epoch": 1.2770166099426943, "grad_norm": 11.0625, "learning_rate": 5.743279637532742e-06, "loss": 1.4631, "step": 43900 }, { "epoch": 1.2775983942752422, "grad_norm": 14.875, "learning_rate": 5.741340355860127e-06, "loss": 1.5205, "step": 43920 }, { "epoch": 1.2781801786077902, "grad_norm": 13.0, "learning_rate": 5.739401074187512e-06, "loss": 1.5229, "step": 43940 }, { "epoch": 1.278761962940338, "grad_norm": 12.5, "learning_rate": 5.737461792514897e-06, "loss": 1.5677, "step": 43960 }, { "epoch": 1.279343747272886, "grad_norm": 13.5, "learning_rate": 5.735522510842282e-06, "loss": 1.4604, "step": 43980 }, { "epoch": 1.2799255316054339, "grad_norm": 13.125, "learning_rate": 5.733583229169667e-06, "loss": 1.5695, "step": 44000 }, { "epoch": 1.2805073159379818, "grad_norm": 14.375, "learning_rate": 5.731643947497052e-06, "loss": 1.5317, "step": 44020 }, { "epoch": 1.2810891002705298, "grad_norm": 14.5, "learning_rate": 5.729704665824437e-06, "loss": 1.5426, "step": 44040 }, { "epoch": 1.2816708846030775, "grad_norm": 15.5625, "learning_rate": 5.7277653841518224e-06, "loss": 1.6236, "step": 44060 }, { "epoch": 1.2822526689356255, "grad_norm": 12.4375, "learning_rate": 5.7258261024792075e-06, "loss": 1.5514, "step": 44080 }, { "epoch": 1.2828344532681735, "grad_norm": 13.4375, "learning_rate": 5.723886820806593e-06, "loss": 1.5181, "step": 44100 }, { "epoch": 1.2834162376007214, "grad_norm": 13.3125, "learning_rate": 5.721947539133978e-06, "loss": 1.5044, "step": 44120 }, { "epoch": 1.2839980219332694, "grad_norm": 11.75, "learning_rate": 5.720008257461363e-06, "loss": 1.5696, "step": 44140 }, { "epoch": 1.2845798062658171, "grad_norm": 14.1875, "learning_rate": 5.718068975788748e-06, "loss": 1.4719, "step": 44160 }, { "epoch": 1.2851615905983653, "grad_norm": 13.375, "learning_rate": 5.716129694116133e-06, "loss": 1.4986, "step": 44180 }, { "epoch": 1.285743374930913, "grad_norm": 12.8125, "learning_rate": 5.714190412443517e-06, "loss": 1.5602, "step": 44200 }, { "epoch": 1.286325159263461, "grad_norm": 10.875, "learning_rate": 5.712251130770902e-06, "loss": 1.5185, "step": 44220 }, { "epoch": 1.286906943596009, "grad_norm": 12.0625, "learning_rate": 5.7103118490982875e-06, "loss": 1.4907, "step": 44240 }, { "epoch": 1.287488727928557, "grad_norm": 12.0625, "learning_rate": 5.7083725674256726e-06, "loss": 1.4145, "step": 44260 }, { "epoch": 1.288070512261105, "grad_norm": 14.375, "learning_rate": 5.706433285753058e-06, "loss": 1.5275, "step": 44280 }, { "epoch": 1.2886522965936527, "grad_norm": 15.3125, "learning_rate": 5.704494004080443e-06, "loss": 1.5402, "step": 44300 }, { "epoch": 1.2892340809262006, "grad_norm": 12.375, "learning_rate": 5.702554722407828e-06, "loss": 1.6014, "step": 44320 }, { "epoch": 1.2898158652587486, "grad_norm": 13.75, "learning_rate": 5.700615440735213e-06, "loss": 1.6052, "step": 44340 }, { "epoch": 1.2903976495912965, "grad_norm": 12.5625, "learning_rate": 5.698676159062598e-06, "loss": 1.4923, "step": 44360 }, { "epoch": 1.2909794339238445, "grad_norm": 11.5625, "learning_rate": 5.696736877389983e-06, "loss": 1.4916, "step": 44380 }, { "epoch": 1.2915612182563923, "grad_norm": 13.4375, "learning_rate": 5.694797595717368e-06, "loss": 1.5162, "step": 44400 }, { "epoch": 1.2921430025889402, "grad_norm": 11.875, "learning_rate": 5.692858314044753e-06, "loss": 1.6063, "step": 44420 }, { "epoch": 1.2927247869214882, "grad_norm": 11.6875, "learning_rate": 5.6909190323721384e-06, "loss": 1.6026, "step": 44440 }, { "epoch": 1.2933065712540361, "grad_norm": 12.3125, "learning_rate": 5.6889797506995235e-06, "loss": 1.3727, "step": 44460 }, { "epoch": 1.293888355586584, "grad_norm": 12.4375, "learning_rate": 5.687040469026909e-06, "loss": 1.517, "step": 44480 }, { "epoch": 1.2944701399191318, "grad_norm": 11.3125, "learning_rate": 5.685101187354294e-06, "loss": 1.4526, "step": 44500 }, { "epoch": 1.29505192425168, "grad_norm": 17.375, "learning_rate": 5.683161905681679e-06, "loss": 1.6621, "step": 44520 }, { "epoch": 1.2956337085842278, "grad_norm": 14.9375, "learning_rate": 5.681222624009064e-06, "loss": 1.5943, "step": 44540 }, { "epoch": 1.2962154929167757, "grad_norm": 12.4375, "learning_rate": 5.679283342336449e-06, "loss": 1.5046, "step": 44560 }, { "epoch": 1.2967972772493237, "grad_norm": 14.75, "learning_rate": 5.677344060663834e-06, "loss": 1.5444, "step": 44580 }, { "epoch": 1.2973790615818717, "grad_norm": 13.8125, "learning_rate": 5.675404778991219e-06, "loss": 1.5583, "step": 44600 }, { "epoch": 1.2979608459144196, "grad_norm": 14.625, "learning_rate": 5.673465497318604e-06, "loss": 1.4507, "step": 44620 }, { "epoch": 1.2985426302469674, "grad_norm": 11.0625, "learning_rate": 5.671526215645989e-06, "loss": 1.5596, "step": 44640 }, { "epoch": 1.2991244145795153, "grad_norm": 13.25, "learning_rate": 5.6695869339733745e-06, "loss": 1.5583, "step": 44660 }, { "epoch": 1.2997061989120633, "grad_norm": 38.75, "learning_rate": 5.66764765230076e-06, "loss": 1.5247, "step": 44680 }, { "epoch": 1.3002879832446113, "grad_norm": 7.75, "learning_rate": 5.665708370628145e-06, "loss": 1.3917, "step": 44700 }, { "epoch": 1.3008697675771592, "grad_norm": 15.0625, "learning_rate": 5.66376908895553e-06, "loss": 1.5193, "step": 44720 }, { "epoch": 1.301451551909707, "grad_norm": 13.3125, "learning_rate": 5.661829807282915e-06, "loss": 1.5081, "step": 44740 }, { "epoch": 1.302033336242255, "grad_norm": 14.75, "learning_rate": 5.6598905256103e-06, "loss": 1.4606, "step": 44760 }, { "epoch": 1.3026151205748029, "grad_norm": 11.4375, "learning_rate": 5.657951243937685e-06, "loss": 1.5302, "step": 44780 }, { "epoch": 1.3031969049073509, "grad_norm": 10.875, "learning_rate": 5.65601196226507e-06, "loss": 1.53, "step": 44800 }, { "epoch": 1.3037786892398988, "grad_norm": 12.6875, "learning_rate": 5.654072680592455e-06, "loss": 1.5268, "step": 44820 }, { "epoch": 1.3043604735724468, "grad_norm": 12.625, "learning_rate": 5.65213339891984e-06, "loss": 1.5246, "step": 44840 }, { "epoch": 1.3049422579049947, "grad_norm": 13.9375, "learning_rate": 5.6501941172472255e-06, "loss": 1.5355, "step": 44860 }, { "epoch": 1.3055240422375425, "grad_norm": 12.4375, "learning_rate": 5.648254835574611e-06, "loss": 1.5432, "step": 44880 }, { "epoch": 1.3061058265700904, "grad_norm": 13.3125, "learning_rate": 5.646315553901996e-06, "loss": 1.5031, "step": 44900 }, { "epoch": 1.3066876109026384, "grad_norm": 12.75, "learning_rate": 5.644376272229381e-06, "loss": 1.4694, "step": 44920 }, { "epoch": 1.3072693952351864, "grad_norm": 13.5, "learning_rate": 5.642436990556766e-06, "loss": 1.5157, "step": 44940 }, { "epoch": 1.3078511795677343, "grad_norm": 12.0625, "learning_rate": 5.640497708884151e-06, "loss": 1.461, "step": 44960 }, { "epoch": 1.308432963900282, "grad_norm": 12.6875, "learning_rate": 5.638558427211536e-06, "loss": 1.5395, "step": 44980 }, { "epoch": 1.30901474823283, "grad_norm": 12.0, "learning_rate": 5.636619145538921e-06, "loss": 1.5547, "step": 45000 }, { "epoch": 1.309596532565378, "grad_norm": 10.1875, "learning_rate": 5.634679863866305e-06, "loss": 1.5409, "step": 45020 }, { "epoch": 1.310178316897926, "grad_norm": 13.4375, "learning_rate": 5.6327405821936905e-06, "loss": 1.4595, "step": 45040 }, { "epoch": 1.310760101230474, "grad_norm": 11.3125, "learning_rate": 5.630801300521076e-06, "loss": 1.4294, "step": 45060 }, { "epoch": 1.3113418855630217, "grad_norm": 15.625, "learning_rate": 5.628862018848461e-06, "loss": 1.5729, "step": 45080 }, { "epoch": 1.3119236698955696, "grad_norm": 12.875, "learning_rate": 5.626922737175846e-06, "loss": 1.4949, "step": 45100 }, { "epoch": 1.3125054542281176, "grad_norm": 15.4375, "learning_rate": 5.624983455503231e-06, "loss": 1.5148, "step": 45120 }, { "epoch": 1.3130872385606656, "grad_norm": 12.5, "learning_rate": 5.623044173830616e-06, "loss": 1.5115, "step": 45140 }, { "epoch": 1.3136690228932135, "grad_norm": 14.25, "learning_rate": 5.621104892158001e-06, "loss": 1.4664, "step": 45160 }, { "epoch": 1.3142508072257615, "grad_norm": 10.4375, "learning_rate": 5.619165610485386e-06, "loss": 1.5492, "step": 45180 }, { "epoch": 1.3148325915583094, "grad_norm": 11.625, "learning_rate": 5.617226328812771e-06, "loss": 1.4522, "step": 45200 }, { "epoch": 1.3154143758908572, "grad_norm": 12.1875, "learning_rate": 5.615287047140156e-06, "loss": 1.582, "step": 45220 }, { "epoch": 1.3159961602234052, "grad_norm": 14.0625, "learning_rate": 5.6133477654675415e-06, "loss": 1.4966, "step": 45240 }, { "epoch": 1.3165779445559531, "grad_norm": 11.9375, "learning_rate": 5.611408483794927e-06, "loss": 1.5052, "step": 45260 }, { "epoch": 1.317159728888501, "grad_norm": 11.9375, "learning_rate": 5.609469202122312e-06, "loss": 1.558, "step": 45280 }, { "epoch": 1.317741513221049, "grad_norm": 12.9375, "learning_rate": 5.607529920449697e-06, "loss": 1.4795, "step": 45300 }, { "epoch": 1.3183232975535968, "grad_norm": 11.8125, "learning_rate": 5.605590638777082e-06, "loss": 1.4875, "step": 45320 }, { "epoch": 1.3189050818861447, "grad_norm": 14.0625, "learning_rate": 5.603651357104467e-06, "loss": 1.4081, "step": 45340 }, { "epoch": 1.3194868662186927, "grad_norm": 13.0625, "learning_rate": 5.601712075431852e-06, "loss": 1.6053, "step": 45360 }, { "epoch": 1.3200686505512407, "grad_norm": 17.0, "learning_rate": 5.599772793759237e-06, "loss": 1.5608, "step": 45380 }, { "epoch": 1.3206504348837886, "grad_norm": 12.375, "learning_rate": 5.597833512086622e-06, "loss": 1.5341, "step": 45400 }, { "epoch": 1.3212322192163364, "grad_norm": 15.6875, "learning_rate": 5.595894230414007e-06, "loss": 1.5038, "step": 45420 }, { "epoch": 1.3218140035488846, "grad_norm": 13.6875, "learning_rate": 5.5939549487413925e-06, "loss": 1.4798, "step": 45440 }, { "epoch": 1.3223957878814323, "grad_norm": 14.375, "learning_rate": 5.5920156670687776e-06, "loss": 1.5253, "step": 45460 }, { "epoch": 1.3229775722139803, "grad_norm": 16.125, "learning_rate": 5.590076385396163e-06, "loss": 1.4623, "step": 45480 }, { "epoch": 1.3235593565465282, "grad_norm": 12.0, "learning_rate": 5.588137103723548e-06, "loss": 1.5533, "step": 45500 }, { "epoch": 1.3241411408790762, "grad_norm": 11.375, "learning_rate": 5.586197822050933e-06, "loss": 1.5577, "step": 45520 }, { "epoch": 1.3247229252116242, "grad_norm": 11.125, "learning_rate": 5.584258540378318e-06, "loss": 1.5427, "step": 45540 }, { "epoch": 1.325304709544172, "grad_norm": 13.9375, "learning_rate": 5.582319258705703e-06, "loss": 1.5284, "step": 45560 }, { "epoch": 1.3258864938767199, "grad_norm": 12.375, "learning_rate": 5.580379977033088e-06, "loss": 1.5029, "step": 45580 }, { "epoch": 1.3264682782092678, "grad_norm": 12.1875, "learning_rate": 5.578440695360473e-06, "loss": 1.5527, "step": 45600 }, { "epoch": 1.3270500625418158, "grad_norm": 12.6875, "learning_rate": 5.576501413687858e-06, "loss": 1.5544, "step": 45620 }, { "epoch": 1.3276318468743638, "grad_norm": 14.1875, "learning_rate": 5.5745621320152434e-06, "loss": 1.5591, "step": 45640 }, { "epoch": 1.3282136312069115, "grad_norm": 12.8125, "learning_rate": 5.5726228503426285e-06, "loss": 1.4167, "step": 45660 }, { "epoch": 1.3287954155394595, "grad_norm": 13.375, "learning_rate": 5.570683568670014e-06, "loss": 1.4603, "step": 45680 }, { "epoch": 1.3293771998720074, "grad_norm": 15.0625, "learning_rate": 5.568744286997399e-06, "loss": 1.5703, "step": 45700 }, { "epoch": 1.3299589842045554, "grad_norm": 14.6875, "learning_rate": 5.566805005324784e-06, "loss": 1.5153, "step": 45720 }, { "epoch": 1.3305407685371033, "grad_norm": 14.375, "learning_rate": 5.564865723652169e-06, "loss": 1.4395, "step": 45740 }, { "epoch": 1.331122552869651, "grad_norm": 14.875, "learning_rate": 5.562926441979554e-06, "loss": 1.394, "step": 45760 }, { "epoch": 1.3317043372021993, "grad_norm": 13.5625, "learning_rate": 5.560987160306939e-06, "loss": 1.501, "step": 45780 }, { "epoch": 1.332286121534747, "grad_norm": 12.8125, "learning_rate": 5.559047878634324e-06, "loss": 1.4852, "step": 45800 }, { "epoch": 1.332867905867295, "grad_norm": 12.25, "learning_rate": 5.557108596961709e-06, "loss": 1.5356, "step": 45820 }, { "epoch": 1.333449690199843, "grad_norm": 13.25, "learning_rate": 5.5551693152890936e-06, "loss": 1.4918, "step": 45840 }, { "epoch": 1.334031474532391, "grad_norm": 15.5, "learning_rate": 5.553230033616479e-06, "loss": 1.4625, "step": 45860 }, { "epoch": 1.3346132588649389, "grad_norm": 13.0, "learning_rate": 5.551290751943864e-06, "loss": 1.4486, "step": 45880 }, { "epoch": 1.3351950431974866, "grad_norm": 12.1875, "learning_rate": 5.549351470271249e-06, "loss": 1.4094, "step": 45900 }, { "epoch": 1.3357768275300346, "grad_norm": 12.875, "learning_rate": 5.547412188598634e-06, "loss": 1.5693, "step": 45920 }, { "epoch": 1.3363586118625825, "grad_norm": 11.0625, "learning_rate": 5.545472906926019e-06, "loss": 1.5614, "step": 45940 }, { "epoch": 1.3369403961951305, "grad_norm": 14.25, "learning_rate": 5.543533625253404e-06, "loss": 1.5938, "step": 45960 }, { "epoch": 1.3375221805276785, "grad_norm": 11.625, "learning_rate": 5.541594343580789e-06, "loss": 1.4579, "step": 45980 }, { "epoch": 1.3381039648602262, "grad_norm": 14.875, "learning_rate": 5.539655061908174e-06, "loss": 1.5772, "step": 46000 }, { "epoch": 1.3386857491927742, "grad_norm": 14.0, "learning_rate": 5.5377157802355594e-06, "loss": 1.6044, "step": 46020 }, { "epoch": 1.3392675335253221, "grad_norm": 13.6875, "learning_rate": 5.535776498562944e-06, "loss": 1.5251, "step": 46040 }, { "epoch": 1.33984931785787, "grad_norm": 13.125, "learning_rate": 5.533837216890329e-06, "loss": 1.5302, "step": 46060 }, { "epoch": 1.340431102190418, "grad_norm": 11.9375, "learning_rate": 5.531897935217714e-06, "loss": 1.5406, "step": 46080 }, { "epoch": 1.341012886522966, "grad_norm": 12.5, "learning_rate": 5.529958653545099e-06, "loss": 1.5536, "step": 46100 }, { "epoch": 1.341594670855514, "grad_norm": 17.875, "learning_rate": 5.528019371872484e-06, "loss": 1.5169, "step": 46120 }, { "epoch": 1.3421764551880617, "grad_norm": 16.125, "learning_rate": 5.526080090199869e-06, "loss": 1.5308, "step": 46140 }, { "epoch": 1.3427582395206097, "grad_norm": 11.3125, "learning_rate": 5.524140808527254e-06, "loss": 1.61, "step": 46160 }, { "epoch": 1.3433400238531576, "grad_norm": 13.875, "learning_rate": 5.522201526854639e-06, "loss": 1.5143, "step": 46180 }, { "epoch": 1.3439218081857056, "grad_norm": 13.5, "learning_rate": 5.5202622451820245e-06, "loss": 1.5182, "step": 46200 }, { "epoch": 1.3445035925182536, "grad_norm": 13.25, "learning_rate": 5.5183229635094096e-06, "loss": 1.49, "step": 46220 }, { "epoch": 1.3450853768508013, "grad_norm": 13.75, "learning_rate": 5.516383681836795e-06, "loss": 1.4638, "step": 46240 }, { "epoch": 1.3456671611833493, "grad_norm": 15.5, "learning_rate": 5.51444440016418e-06, "loss": 1.4815, "step": 46260 }, { "epoch": 1.3462489455158972, "grad_norm": 13.0, "learning_rate": 5.512505118491565e-06, "loss": 1.4858, "step": 46280 }, { "epoch": 1.3468307298484452, "grad_norm": 15.25, "learning_rate": 5.51056583681895e-06, "loss": 1.5412, "step": 46300 }, { "epoch": 1.3474125141809932, "grad_norm": 13.375, "learning_rate": 5.508626555146335e-06, "loss": 1.5732, "step": 46320 }, { "epoch": 1.347994298513541, "grad_norm": 14.1875, "learning_rate": 5.50668727347372e-06, "loss": 1.548, "step": 46340 }, { "epoch": 1.3485760828460889, "grad_norm": 13.8125, "learning_rate": 5.504747991801105e-06, "loss": 1.4905, "step": 46360 }, { "epoch": 1.3491578671786368, "grad_norm": 14.25, "learning_rate": 5.50280871012849e-06, "loss": 1.4526, "step": 46380 }, { "epoch": 1.3497396515111848, "grad_norm": 11.0, "learning_rate": 5.500869428455875e-06, "loss": 1.4639, "step": 46400 }, { "epoch": 1.3503214358437328, "grad_norm": 13.5625, "learning_rate": 5.49893014678326e-06, "loss": 1.5546, "step": 46420 }, { "epoch": 1.3509032201762807, "grad_norm": 13.8125, "learning_rate": 5.496990865110645e-06, "loss": 1.5003, "step": 46440 }, { "epoch": 1.3514850045088287, "grad_norm": 13.3125, "learning_rate": 5.49505158343803e-06, "loss": 1.5832, "step": 46460 }, { "epoch": 1.3520667888413764, "grad_norm": 13.75, "learning_rate": 5.493112301765415e-06, "loss": 1.5994, "step": 46480 }, { "epoch": 1.3526485731739244, "grad_norm": 13.625, "learning_rate": 5.4911730200928e-06, "loss": 1.4468, "step": 46500 }, { "epoch": 1.3532303575064724, "grad_norm": 14.9375, "learning_rate": 5.489233738420185e-06, "loss": 1.4975, "step": 46520 }, { "epoch": 1.3538121418390203, "grad_norm": 12.375, "learning_rate": 5.48729445674757e-06, "loss": 1.4866, "step": 46540 }, { "epoch": 1.3543939261715683, "grad_norm": 13.375, "learning_rate": 5.485355175074955e-06, "loss": 1.5129, "step": 46560 }, { "epoch": 1.354975710504116, "grad_norm": 12.9375, "learning_rate": 5.4834158934023405e-06, "loss": 1.5391, "step": 46580 }, { "epoch": 1.355557494836664, "grad_norm": 12.8125, "learning_rate": 5.4814766117297256e-06, "loss": 1.5308, "step": 46600 }, { "epoch": 1.356139279169212, "grad_norm": 13.25, "learning_rate": 5.479537330057111e-06, "loss": 1.5014, "step": 46620 }, { "epoch": 1.35672106350176, "grad_norm": 13.5, "learning_rate": 5.477598048384496e-06, "loss": 1.5595, "step": 46640 }, { "epoch": 1.3573028478343079, "grad_norm": 13.5, "learning_rate": 5.475658766711881e-06, "loss": 1.4568, "step": 46660 }, { "epoch": 1.3578846321668556, "grad_norm": 13.3125, "learning_rate": 5.473719485039266e-06, "loss": 1.5577, "step": 46680 }, { "epoch": 1.3584664164994038, "grad_norm": 12.6875, "learning_rate": 5.471780203366651e-06, "loss": 1.5837, "step": 46700 }, { "epoch": 1.3590482008319515, "grad_norm": 12.6875, "learning_rate": 5.469840921694036e-06, "loss": 1.5158, "step": 46720 }, { "epoch": 1.3596299851644995, "grad_norm": 12.4375, "learning_rate": 5.467901640021421e-06, "loss": 1.5101, "step": 46740 }, { "epoch": 1.3602117694970475, "grad_norm": 12.75, "learning_rate": 5.465962358348806e-06, "loss": 1.488, "step": 46760 }, { "epoch": 1.3607935538295954, "grad_norm": 22.5, "learning_rate": 5.4640230766761915e-06, "loss": 1.5384, "step": 46780 }, { "epoch": 1.3613753381621434, "grad_norm": 13.0625, "learning_rate": 5.4620837950035766e-06, "loss": 1.5471, "step": 46800 }, { "epoch": 1.3619571224946911, "grad_norm": 14.625, "learning_rate": 5.460144513330962e-06, "loss": 1.5273, "step": 46820 }, { "epoch": 1.362538906827239, "grad_norm": 13.6875, "learning_rate": 5.458205231658347e-06, "loss": 1.5492, "step": 46840 }, { "epoch": 1.363120691159787, "grad_norm": 11.625, "learning_rate": 5.456265949985732e-06, "loss": 1.4581, "step": 46860 }, { "epoch": 1.363702475492335, "grad_norm": 14.6875, "learning_rate": 5.454326668313117e-06, "loss": 1.4906, "step": 46880 }, { "epoch": 1.364284259824883, "grad_norm": 14.6875, "learning_rate": 5.452387386640502e-06, "loss": 1.399, "step": 46900 }, { "epoch": 1.3648660441574307, "grad_norm": 13.0, "learning_rate": 5.450448104967887e-06, "loss": 1.4955, "step": 46920 }, { "epoch": 1.3654478284899787, "grad_norm": 13.0625, "learning_rate": 5.448508823295272e-06, "loss": 1.5175, "step": 46940 }, { "epoch": 1.3660296128225267, "grad_norm": 13.0, "learning_rate": 5.446569541622657e-06, "loss": 1.5245, "step": 46960 }, { "epoch": 1.3666113971550746, "grad_norm": 13.0, "learning_rate": 5.4446302599500424e-06, "loss": 1.6267, "step": 46980 }, { "epoch": 1.3671931814876226, "grad_norm": 12.5625, "learning_rate": 5.4426909782774275e-06, "loss": 1.5281, "step": 47000 }, { "epoch": 1.3677749658201703, "grad_norm": 13.375, "learning_rate": 5.440751696604813e-06, "loss": 1.5513, "step": 47020 }, { "epoch": 1.3683567501527185, "grad_norm": 14.125, "learning_rate": 5.438812414932198e-06, "loss": 1.483, "step": 47040 }, { "epoch": 1.3689385344852663, "grad_norm": 14.8125, "learning_rate": 5.436873133259583e-06, "loss": 1.4988, "step": 47060 }, { "epoch": 1.3695203188178142, "grad_norm": 12.0625, "learning_rate": 5.434933851586968e-06, "loss": 1.532, "step": 47080 }, { "epoch": 1.3701021031503622, "grad_norm": 11.125, "learning_rate": 5.432994569914353e-06, "loss": 1.4573, "step": 47100 }, { "epoch": 1.3706838874829101, "grad_norm": 13.9375, "learning_rate": 5.431055288241738e-06, "loss": 1.494, "step": 47120 }, { "epoch": 1.371265671815458, "grad_norm": 15.125, "learning_rate": 5.429116006569123e-06, "loss": 1.4989, "step": 47140 }, { "epoch": 1.3718474561480059, "grad_norm": 12.375, "learning_rate": 5.427176724896508e-06, "loss": 1.5388, "step": 47160 }, { "epoch": 1.3724292404805538, "grad_norm": 14.625, "learning_rate": 5.425237443223893e-06, "loss": 1.4658, "step": 47180 }, { "epoch": 1.3730110248131018, "grad_norm": 14.1875, "learning_rate": 5.4232981615512785e-06, "loss": 1.4219, "step": 47200 }, { "epoch": 1.3735928091456497, "grad_norm": 15.875, "learning_rate": 5.421358879878663e-06, "loss": 1.555, "step": 47220 }, { "epoch": 1.3741745934781977, "grad_norm": 17.25, "learning_rate": 5.419419598206048e-06, "loss": 1.4773, "step": 47240 }, { "epoch": 1.3747563778107454, "grad_norm": 13.9375, "learning_rate": 5.417480316533433e-06, "loss": 1.5697, "step": 47260 }, { "epoch": 1.3753381621432934, "grad_norm": 15.125, "learning_rate": 5.415541034860818e-06, "loss": 1.564, "step": 47280 }, { "epoch": 1.3759199464758414, "grad_norm": 15.4375, "learning_rate": 5.413601753188203e-06, "loss": 1.4945, "step": 47300 }, { "epoch": 1.3765017308083893, "grad_norm": 13.9375, "learning_rate": 5.411662471515588e-06, "loss": 1.4841, "step": 47320 }, { "epoch": 1.3770835151409373, "grad_norm": 12.9375, "learning_rate": 5.409723189842973e-06, "loss": 1.512, "step": 47340 }, { "epoch": 1.3776652994734853, "grad_norm": 12.125, "learning_rate": 5.4077839081703584e-06, "loss": 1.4335, "step": 47360 }, { "epoch": 1.3782470838060332, "grad_norm": 11.9375, "learning_rate": 5.4058446264977435e-06, "loss": 1.5114, "step": 47380 }, { "epoch": 1.378828868138581, "grad_norm": 10.9375, "learning_rate": 5.403905344825129e-06, "loss": 1.452, "step": 47400 }, { "epoch": 1.379410652471129, "grad_norm": 11.375, "learning_rate": 5.401966063152514e-06, "loss": 1.5196, "step": 47420 }, { "epoch": 1.379992436803677, "grad_norm": 12.25, "learning_rate": 5.400026781479899e-06, "loss": 1.6073, "step": 47440 }, { "epoch": 1.3805742211362249, "grad_norm": 11.1875, "learning_rate": 5.398087499807284e-06, "loss": 1.5454, "step": 47460 }, { "epoch": 1.3811560054687728, "grad_norm": 11.5625, "learning_rate": 5.396148218134669e-06, "loss": 1.5641, "step": 47480 }, { "epoch": 1.3817377898013206, "grad_norm": 14.0, "learning_rate": 5.394208936462054e-06, "loss": 1.6132, "step": 47500 }, { "epoch": 1.3823195741338685, "grad_norm": 13.0, "learning_rate": 5.392269654789439e-06, "loss": 1.4819, "step": 47520 }, { "epoch": 1.3829013584664165, "grad_norm": 13.125, "learning_rate": 5.390330373116824e-06, "loss": 1.6146, "step": 47540 }, { "epoch": 1.3834831427989644, "grad_norm": 13.875, "learning_rate": 5.388391091444209e-06, "loss": 1.5331, "step": 47560 }, { "epoch": 1.3840649271315124, "grad_norm": 12.625, "learning_rate": 5.3864518097715945e-06, "loss": 1.6131, "step": 47580 }, { "epoch": 1.3846467114640602, "grad_norm": 12.1875, "learning_rate": 5.38451252809898e-06, "loss": 1.4784, "step": 47600 }, { "epoch": 1.3852284957966081, "grad_norm": 14.3125, "learning_rate": 5.382573246426365e-06, "loss": 1.5693, "step": 47620 }, { "epoch": 1.385810280129156, "grad_norm": 13.6875, "learning_rate": 5.38063396475375e-06, "loss": 1.497, "step": 47640 }, { "epoch": 1.386392064461704, "grad_norm": 12.875, "learning_rate": 5.378694683081135e-06, "loss": 1.4902, "step": 47660 }, { "epoch": 1.386973848794252, "grad_norm": 13.875, "learning_rate": 5.37675540140852e-06, "loss": 1.4721, "step": 47680 }, { "epoch": 1.3875556331268, "grad_norm": 13.3125, "learning_rate": 5.374816119735905e-06, "loss": 1.4737, "step": 47700 }, { "epoch": 1.388137417459348, "grad_norm": 12.125, "learning_rate": 5.37287683806329e-06, "loss": 1.5667, "step": 47720 }, { "epoch": 1.3887192017918957, "grad_norm": 11.9375, "learning_rate": 5.370937556390675e-06, "loss": 1.5925, "step": 47740 }, { "epoch": 1.3893009861244436, "grad_norm": 15.0, "learning_rate": 5.36899827471806e-06, "loss": 1.5047, "step": 47760 }, { "epoch": 1.3898827704569916, "grad_norm": 13.375, "learning_rate": 5.3670589930454455e-06, "loss": 1.5228, "step": 47780 }, { "epoch": 1.3904645547895396, "grad_norm": 14.1875, "learning_rate": 5.365119711372831e-06, "loss": 1.4962, "step": 47800 }, { "epoch": 1.3910463391220875, "grad_norm": 13.5, "learning_rate": 5.363180429700216e-06, "loss": 1.5033, "step": 47820 }, { "epoch": 1.3916281234546353, "grad_norm": 15.0625, "learning_rate": 5.361241148027601e-06, "loss": 1.5003, "step": 47840 }, { "epoch": 1.3922099077871832, "grad_norm": 12.75, "learning_rate": 5.359301866354986e-06, "loss": 1.54, "step": 47860 }, { "epoch": 1.3927916921197312, "grad_norm": 12.9375, "learning_rate": 5.357362584682371e-06, "loss": 1.4891, "step": 47880 }, { "epoch": 1.3933734764522792, "grad_norm": 15.125, "learning_rate": 5.355423303009756e-06, "loss": 1.4649, "step": 47900 }, { "epoch": 1.3939552607848271, "grad_norm": 13.5625, "learning_rate": 5.353484021337141e-06, "loss": 1.5445, "step": 47920 }, { "epoch": 1.3945370451173749, "grad_norm": 13.125, "learning_rate": 5.351544739664526e-06, "loss": 1.5875, "step": 47940 }, { "epoch": 1.395118829449923, "grad_norm": 12.8125, "learning_rate": 5.349605457991911e-06, "loss": 1.5797, "step": 47960 }, { "epoch": 1.3957006137824708, "grad_norm": 12.625, "learning_rate": 5.3476661763192965e-06, "loss": 1.5652, "step": 47980 }, { "epoch": 1.3962823981150188, "grad_norm": 15.375, "learning_rate": 5.3457268946466816e-06, "loss": 1.5665, "step": 48000 }, { "epoch": 1.3968641824475667, "grad_norm": 16.375, "learning_rate": 5.343787612974067e-06, "loss": 1.4943, "step": 48020 }, { "epoch": 1.3974459667801147, "grad_norm": 11.5625, "learning_rate": 5.341848331301451e-06, "loss": 1.5187, "step": 48040 }, { "epoch": 1.3980277511126626, "grad_norm": 13.125, "learning_rate": 5.339909049628836e-06, "loss": 1.5284, "step": 48060 }, { "epoch": 1.3986095354452104, "grad_norm": 9.375, "learning_rate": 5.337969767956221e-06, "loss": 1.4807, "step": 48080 }, { "epoch": 1.3991913197777583, "grad_norm": 13.125, "learning_rate": 5.336030486283606e-06, "loss": 1.4139, "step": 48100 }, { "epoch": 1.3997731041103063, "grad_norm": 13.25, "learning_rate": 5.334091204610991e-06, "loss": 1.5286, "step": 48120 }, { "epoch": 1.4003548884428543, "grad_norm": 12.0, "learning_rate": 5.332151922938376e-06, "loss": 1.4749, "step": 48140 }, { "epoch": 1.4009366727754022, "grad_norm": 14.375, "learning_rate": 5.3302126412657615e-06, "loss": 1.5266, "step": 48160 }, { "epoch": 1.40151845710795, "grad_norm": 12.375, "learning_rate": 5.328273359593147e-06, "loss": 1.5522, "step": 48180 }, { "epoch": 1.402100241440498, "grad_norm": 15.375, "learning_rate": 5.326334077920532e-06, "loss": 1.502, "step": 48200 }, { "epoch": 1.402682025773046, "grad_norm": 13.0, "learning_rate": 5.324394796247917e-06, "loss": 1.4469, "step": 48220 }, { "epoch": 1.4032638101055939, "grad_norm": 12.0, "learning_rate": 5.322455514575302e-06, "loss": 1.6373, "step": 48240 }, { "epoch": 1.4038455944381418, "grad_norm": 12.9375, "learning_rate": 5.320516232902687e-06, "loss": 1.4664, "step": 48260 }, { "epoch": 1.4044273787706896, "grad_norm": 13.125, "learning_rate": 5.318576951230072e-06, "loss": 1.5461, "step": 48280 }, { "epoch": 1.4050091631032378, "grad_norm": 15.9375, "learning_rate": 5.316637669557457e-06, "loss": 1.5507, "step": 48300 }, { "epoch": 1.4055909474357855, "grad_norm": 13.375, "learning_rate": 5.314698387884842e-06, "loss": 1.5017, "step": 48320 }, { "epoch": 1.4061727317683335, "grad_norm": 14.6875, "learning_rate": 5.312759106212227e-06, "loss": 1.5485, "step": 48340 }, { "epoch": 1.4067545161008814, "grad_norm": 14.9375, "learning_rate": 5.3108198245396125e-06, "loss": 1.576, "step": 48360 }, { "epoch": 1.4073363004334294, "grad_norm": 14.9375, "learning_rate": 5.3088805428669976e-06, "loss": 1.5244, "step": 48380 }, { "epoch": 1.4079180847659774, "grad_norm": 15.6875, "learning_rate": 5.306941261194383e-06, "loss": 1.539, "step": 48400 }, { "epoch": 1.408499869098525, "grad_norm": 11.875, "learning_rate": 5.305001979521768e-06, "loss": 1.5648, "step": 48420 }, { "epoch": 1.409081653431073, "grad_norm": 12.75, "learning_rate": 5.303062697849153e-06, "loss": 1.5098, "step": 48440 }, { "epoch": 1.409663437763621, "grad_norm": 12.125, "learning_rate": 5.301123416176538e-06, "loss": 1.4494, "step": 48460 }, { "epoch": 1.410245222096169, "grad_norm": 13.0, "learning_rate": 5.299184134503923e-06, "loss": 1.5425, "step": 48480 }, { "epoch": 1.410827006428717, "grad_norm": 13.75, "learning_rate": 5.297244852831308e-06, "loss": 1.4474, "step": 48500 }, { "epoch": 1.4114087907612647, "grad_norm": 11.9375, "learning_rate": 5.295305571158693e-06, "loss": 1.4395, "step": 48520 }, { "epoch": 1.4119905750938126, "grad_norm": 13.8125, "learning_rate": 5.293366289486078e-06, "loss": 1.5865, "step": 48540 }, { "epoch": 1.4125723594263606, "grad_norm": 16.375, "learning_rate": 5.2914270078134634e-06, "loss": 1.4994, "step": 48560 }, { "epoch": 1.4131541437589086, "grad_norm": 11.3125, "learning_rate": 5.2894877261408485e-06, "loss": 1.4271, "step": 48580 }, { "epoch": 1.4137359280914565, "grad_norm": 11.6875, "learning_rate": 5.287548444468234e-06, "loss": 1.5099, "step": 48600 }, { "epoch": 1.4143177124240045, "grad_norm": 13.5, "learning_rate": 5.285609162795619e-06, "loss": 1.5398, "step": 48620 }, { "epoch": 1.4148994967565525, "grad_norm": 12.75, "learning_rate": 5.283669881123004e-06, "loss": 1.5669, "step": 48640 }, { "epoch": 1.4154812810891002, "grad_norm": 12.0, "learning_rate": 5.281730599450389e-06, "loss": 1.5473, "step": 48660 }, { "epoch": 1.4160630654216482, "grad_norm": 15.25, "learning_rate": 5.279791317777774e-06, "loss": 1.5169, "step": 48680 }, { "epoch": 1.4166448497541961, "grad_norm": 12.25, "learning_rate": 5.277852036105159e-06, "loss": 1.5115, "step": 48700 }, { "epoch": 1.417226634086744, "grad_norm": 13.75, "learning_rate": 5.275912754432544e-06, "loss": 1.5089, "step": 48720 }, { "epoch": 1.417808418419292, "grad_norm": 14.875, "learning_rate": 5.273973472759929e-06, "loss": 1.5885, "step": 48740 }, { "epoch": 1.4183902027518398, "grad_norm": 13.6875, "learning_rate": 5.272034191087314e-06, "loss": 1.5541, "step": 48760 }, { "epoch": 1.4189719870843878, "grad_norm": 11.0, "learning_rate": 5.2700949094146995e-06, "loss": 1.532, "step": 48780 }, { "epoch": 1.4195537714169357, "grad_norm": 13.625, "learning_rate": 5.268155627742085e-06, "loss": 1.5522, "step": 48800 }, { "epoch": 1.4201355557494837, "grad_norm": 11.8125, "learning_rate": 5.26621634606947e-06, "loss": 1.4616, "step": 48820 }, { "epoch": 1.4207173400820317, "grad_norm": 12.3125, "learning_rate": 5.264277064396855e-06, "loss": 1.5783, "step": 48840 }, { "epoch": 1.4212991244145794, "grad_norm": 12.125, "learning_rate": 5.262337782724239e-06, "loss": 1.4606, "step": 48860 }, { "epoch": 1.4218809087471274, "grad_norm": 13.0, "learning_rate": 5.260398501051624e-06, "loss": 1.5216, "step": 48880 }, { "epoch": 1.4224626930796753, "grad_norm": 11.5, "learning_rate": 5.258459219379009e-06, "loss": 1.552, "step": 48900 }, { "epoch": 1.4230444774122233, "grad_norm": 12.875, "learning_rate": 5.256519937706394e-06, "loss": 1.5649, "step": 48920 }, { "epoch": 1.4236262617447712, "grad_norm": 16.125, "learning_rate": 5.2545806560337794e-06, "loss": 1.4982, "step": 48940 }, { "epoch": 1.4242080460773192, "grad_norm": 12.1875, "learning_rate": 5.2526413743611645e-06, "loss": 1.5495, "step": 48960 }, { "epoch": 1.4247898304098672, "grad_norm": 12.1875, "learning_rate": 5.25070209268855e-06, "loss": 1.5104, "step": 48980 }, { "epoch": 1.425371614742415, "grad_norm": 11.9375, "learning_rate": 5.248762811015935e-06, "loss": 1.4486, "step": 49000 }, { "epoch": 1.4259533990749629, "grad_norm": 11.625, "learning_rate": 5.24682352934332e-06, "loss": 1.4121, "step": 49020 }, { "epoch": 1.4265351834075108, "grad_norm": 13.25, "learning_rate": 5.244884247670705e-06, "loss": 1.5312, "step": 49040 }, { "epoch": 1.4271169677400588, "grad_norm": 13.875, "learning_rate": 5.24294496599809e-06, "loss": 1.4839, "step": 49060 }, { "epoch": 1.4276987520726068, "grad_norm": 14.625, "learning_rate": 5.241005684325475e-06, "loss": 1.5612, "step": 49080 }, { "epoch": 1.4282805364051545, "grad_norm": 14.8125, "learning_rate": 5.23906640265286e-06, "loss": 1.5692, "step": 49100 }, { "epoch": 1.4288623207377025, "grad_norm": 15.125, "learning_rate": 5.237127120980245e-06, "loss": 1.5126, "step": 49120 }, { "epoch": 1.4294441050702504, "grad_norm": 13.375, "learning_rate": 5.23518783930763e-06, "loss": 1.6138, "step": 49140 }, { "epoch": 1.4300258894027984, "grad_norm": 13.0, "learning_rate": 5.2332485576350155e-06, "loss": 1.4812, "step": 49160 }, { "epoch": 1.4306076737353464, "grad_norm": 13.5, "learning_rate": 5.231309275962401e-06, "loss": 1.5094, "step": 49180 }, { "epoch": 1.431189458067894, "grad_norm": 13.25, "learning_rate": 5.229369994289786e-06, "loss": 1.5418, "step": 49200 }, { "epoch": 1.4317712424004423, "grad_norm": 13.875, "learning_rate": 5.227430712617171e-06, "loss": 1.7129, "step": 49220 }, { "epoch": 1.43235302673299, "grad_norm": 12.5625, "learning_rate": 5.225491430944556e-06, "loss": 1.459, "step": 49240 }, { "epoch": 1.432934811065538, "grad_norm": 12.625, "learning_rate": 5.223552149271941e-06, "loss": 1.4942, "step": 49260 }, { "epoch": 1.433516595398086, "grad_norm": 12.3125, "learning_rate": 5.221612867599326e-06, "loss": 1.5423, "step": 49280 }, { "epoch": 1.434098379730634, "grad_norm": 11.75, "learning_rate": 5.219673585926711e-06, "loss": 1.4238, "step": 49300 }, { "epoch": 1.4346801640631819, "grad_norm": 13.0625, "learning_rate": 5.217734304254096e-06, "loss": 1.4657, "step": 49320 }, { "epoch": 1.4352619483957296, "grad_norm": 13.3125, "learning_rate": 5.215795022581481e-06, "loss": 1.4548, "step": 49340 }, { "epoch": 1.4358437327282776, "grad_norm": 17.125, "learning_rate": 5.2138557409088665e-06, "loss": 1.5447, "step": 49360 }, { "epoch": 1.4364255170608256, "grad_norm": 13.1875, "learning_rate": 5.211916459236252e-06, "loss": 1.485, "step": 49380 }, { "epoch": 1.4370073013933735, "grad_norm": 9.8125, "learning_rate": 5.209977177563637e-06, "loss": 1.59, "step": 49400 }, { "epoch": 1.4375890857259215, "grad_norm": 12.3125, "learning_rate": 5.208037895891022e-06, "loss": 1.4542, "step": 49420 }, { "epoch": 1.4381708700584692, "grad_norm": 13.875, "learning_rate": 5.206098614218407e-06, "loss": 1.5553, "step": 49440 }, { "epoch": 1.4387526543910172, "grad_norm": 13.0, "learning_rate": 5.204159332545792e-06, "loss": 1.5352, "step": 49460 }, { "epoch": 1.4393344387235651, "grad_norm": 12.75, "learning_rate": 5.202220050873177e-06, "loss": 1.498, "step": 49480 }, { "epoch": 1.439916223056113, "grad_norm": 12.0625, "learning_rate": 5.200280769200562e-06, "loss": 1.509, "step": 49500 }, { "epoch": 1.440498007388661, "grad_norm": 13.125, "learning_rate": 5.198341487527947e-06, "loss": 1.4639, "step": 49520 }, { "epoch": 1.4410797917212088, "grad_norm": 10.875, "learning_rate": 5.196402205855332e-06, "loss": 1.516, "step": 49540 }, { "epoch": 1.441661576053757, "grad_norm": 14.75, "learning_rate": 5.1944629241827175e-06, "loss": 1.5425, "step": 49560 }, { "epoch": 1.4422433603863047, "grad_norm": 12.5, "learning_rate": 5.1925236425101026e-06, "loss": 1.5151, "step": 49580 }, { "epoch": 1.4428251447188527, "grad_norm": 12.125, "learning_rate": 5.190584360837488e-06, "loss": 1.4453, "step": 49600 }, { "epoch": 1.4434069290514007, "grad_norm": 14.8125, "learning_rate": 5.188645079164873e-06, "loss": 1.5709, "step": 49620 }, { "epoch": 1.4439887133839486, "grad_norm": 11.3125, "learning_rate": 5.186705797492258e-06, "loss": 1.5566, "step": 49640 }, { "epoch": 1.4445704977164966, "grad_norm": 10.6875, "learning_rate": 5.184766515819642e-06, "loss": 1.5536, "step": 49660 }, { "epoch": 1.4451522820490443, "grad_norm": 13.375, "learning_rate": 5.182827234147027e-06, "loss": 1.57, "step": 49680 }, { "epoch": 1.4457340663815923, "grad_norm": 11.875, "learning_rate": 5.180887952474412e-06, "loss": 1.5094, "step": 49700 }, { "epoch": 1.4463158507141403, "grad_norm": 12.625, "learning_rate": 5.178948670801797e-06, "loss": 1.5041, "step": 49720 }, { "epoch": 1.4468976350466882, "grad_norm": 12.9375, "learning_rate": 5.1770093891291825e-06, "loss": 1.5361, "step": 49740 }, { "epoch": 1.4474794193792362, "grad_norm": 13.4375, "learning_rate": 5.175070107456568e-06, "loss": 1.51, "step": 49760 }, { "epoch": 1.448061203711784, "grad_norm": 12.8125, "learning_rate": 5.173130825783953e-06, "loss": 1.4887, "step": 49780 }, { "epoch": 1.448642988044332, "grad_norm": 12.8125, "learning_rate": 5.171191544111338e-06, "loss": 1.5794, "step": 49800 }, { "epoch": 1.4492247723768799, "grad_norm": 12.75, "learning_rate": 5.169252262438723e-06, "loss": 1.5677, "step": 49820 }, { "epoch": 1.4498065567094278, "grad_norm": 14.0, "learning_rate": 5.167312980766108e-06, "loss": 1.5516, "step": 49840 }, { "epoch": 1.4503883410419758, "grad_norm": 11.875, "learning_rate": 5.165373699093493e-06, "loss": 1.5398, "step": 49860 }, { "epoch": 1.4509701253745237, "grad_norm": 12.5, "learning_rate": 5.163434417420878e-06, "loss": 1.5098, "step": 49880 }, { "epoch": 1.4515519097070717, "grad_norm": 11.625, "learning_rate": 5.161495135748263e-06, "loss": 1.5095, "step": 49900 }, { "epoch": 1.4521336940396194, "grad_norm": 12.75, "learning_rate": 5.159555854075648e-06, "loss": 1.5098, "step": 49920 }, { "epoch": 1.4527154783721674, "grad_norm": 14.0, "learning_rate": 5.1576165724030335e-06, "loss": 1.4726, "step": 49940 }, { "epoch": 1.4532972627047154, "grad_norm": 15.6875, "learning_rate": 5.1556772907304186e-06, "loss": 1.5539, "step": 49960 }, { "epoch": 1.4538790470372633, "grad_norm": 13.0, "learning_rate": 5.153738009057804e-06, "loss": 1.5476, "step": 49980 }, { "epoch": 1.4544608313698113, "grad_norm": 10.6875, "learning_rate": 5.151798727385189e-06, "loss": 1.5702, "step": 50000 }, { "epoch": 1.455042615702359, "grad_norm": 12.9375, "learning_rate": 5.149859445712574e-06, "loss": 1.5263, "step": 50020 }, { "epoch": 1.455624400034907, "grad_norm": 12.5625, "learning_rate": 5.147920164039959e-06, "loss": 1.5251, "step": 50040 }, { "epoch": 1.456206184367455, "grad_norm": 11.1875, "learning_rate": 5.145980882367344e-06, "loss": 1.5967, "step": 50060 }, { "epoch": 1.456787968700003, "grad_norm": 13.0625, "learning_rate": 5.144041600694729e-06, "loss": 1.4547, "step": 50080 }, { "epoch": 1.457369753032551, "grad_norm": 12.875, "learning_rate": 5.142102319022114e-06, "loss": 1.5841, "step": 50100 }, { "epoch": 1.4579515373650986, "grad_norm": 10.875, "learning_rate": 5.140163037349499e-06, "loss": 1.5134, "step": 50120 }, { "epoch": 1.4585333216976466, "grad_norm": 17.625, "learning_rate": 5.1382237556768844e-06, "loss": 1.5292, "step": 50140 }, { "epoch": 1.4591151060301946, "grad_norm": 12.0625, "learning_rate": 5.1362844740042695e-06, "loss": 1.486, "step": 50160 }, { "epoch": 1.4596968903627425, "grad_norm": 17.25, "learning_rate": 5.134345192331655e-06, "loss": 1.5385, "step": 50180 }, { "epoch": 1.4602786746952905, "grad_norm": 10.0, "learning_rate": 5.13240591065904e-06, "loss": 1.5024, "step": 50200 }, { "epoch": 1.4608604590278385, "grad_norm": 13.75, "learning_rate": 5.130466628986425e-06, "loss": 1.529, "step": 50220 }, { "epoch": 1.4614422433603864, "grad_norm": 14.875, "learning_rate": 5.12852734731381e-06, "loss": 1.4843, "step": 50240 }, { "epoch": 1.4620240276929342, "grad_norm": 12.0625, "learning_rate": 5.126588065641195e-06, "loss": 1.506, "step": 50260 }, { "epoch": 1.4626058120254821, "grad_norm": 12.6875, "learning_rate": 5.12464878396858e-06, "loss": 1.4215, "step": 50280 }, { "epoch": 1.46318759635803, "grad_norm": 12.375, "learning_rate": 5.1227095022959635e-06, "loss": 1.4734, "step": 50300 }, { "epoch": 1.463769380690578, "grad_norm": 15.5, "learning_rate": 5.120770220623349e-06, "loss": 1.6313, "step": 50320 }, { "epoch": 1.464351165023126, "grad_norm": 13.4375, "learning_rate": 5.118830938950734e-06, "loss": 1.4654, "step": 50340 }, { "epoch": 1.4649329493556738, "grad_norm": 14.0, "learning_rate": 5.116891657278119e-06, "loss": 1.471, "step": 50360 }, { "epoch": 1.4655147336882217, "grad_norm": 11.0, "learning_rate": 5.114952375605504e-06, "loss": 1.5427, "step": 50380 }, { "epoch": 1.4660965180207697, "grad_norm": 14.625, "learning_rate": 5.113013093932889e-06, "loss": 1.572, "step": 50400 }, { "epoch": 1.4666783023533176, "grad_norm": 16.75, "learning_rate": 5.111073812260274e-06, "loss": 1.5277, "step": 50420 }, { "epoch": 1.4672600866858656, "grad_norm": 9.6875, "learning_rate": 5.109134530587659e-06, "loss": 1.471, "step": 50440 }, { "epoch": 1.4678418710184133, "grad_norm": 12.8125, "learning_rate": 5.107195248915044e-06, "loss": 1.5825, "step": 50460 }, { "epoch": 1.4684236553509615, "grad_norm": 14.0, "learning_rate": 5.105255967242429e-06, "loss": 1.5129, "step": 50480 }, { "epoch": 1.4690054396835093, "grad_norm": 12.9375, "learning_rate": 5.1033166855698145e-06, "loss": 1.4023, "step": 50500 }, { "epoch": 1.4695872240160572, "grad_norm": 13.375, "learning_rate": 5.1013774038972e-06, "loss": 1.5123, "step": 50520 }, { "epoch": 1.4701690083486052, "grad_norm": 11.6875, "learning_rate": 5.099438122224585e-06, "loss": 1.5481, "step": 50540 }, { "epoch": 1.4707507926811532, "grad_norm": 16.125, "learning_rate": 5.09749884055197e-06, "loss": 1.4437, "step": 50560 }, { "epoch": 1.4713325770137011, "grad_norm": 12.25, "learning_rate": 5.095559558879355e-06, "loss": 1.4811, "step": 50580 }, { "epoch": 1.4719143613462489, "grad_norm": 13.6875, "learning_rate": 5.09362027720674e-06, "loss": 1.5479, "step": 50600 }, { "epoch": 1.4724961456787968, "grad_norm": 14.375, "learning_rate": 5.091680995534125e-06, "loss": 1.5808, "step": 50620 }, { "epoch": 1.4730779300113448, "grad_norm": 13.0625, "learning_rate": 5.08974171386151e-06, "loss": 1.6277, "step": 50640 }, { "epoch": 1.4736597143438928, "grad_norm": 14.6875, "learning_rate": 5.087802432188895e-06, "loss": 1.5107, "step": 50660 }, { "epoch": 1.4742414986764407, "grad_norm": 14.5625, "learning_rate": 5.08586315051628e-06, "loss": 1.595, "step": 50680 }, { "epoch": 1.4748232830089885, "grad_norm": 13.9375, "learning_rate": 5.0839238688436655e-06, "loss": 1.5751, "step": 50700 }, { "epoch": 1.4754050673415364, "grad_norm": 13.125, "learning_rate": 5.0819845871710506e-06, "loss": 1.4475, "step": 50720 }, { "epoch": 1.4759868516740844, "grad_norm": 13.5625, "learning_rate": 5.080045305498436e-06, "loss": 1.5289, "step": 50740 }, { "epoch": 1.4765686360066324, "grad_norm": 15.9375, "learning_rate": 5.078106023825821e-06, "loss": 1.497, "step": 50760 }, { "epoch": 1.4771504203391803, "grad_norm": 12.5625, "learning_rate": 5.076166742153206e-06, "loss": 1.4242, "step": 50780 }, { "epoch": 1.477732204671728, "grad_norm": 12.4375, "learning_rate": 5.074227460480591e-06, "loss": 1.5828, "step": 50800 }, { "epoch": 1.4783139890042762, "grad_norm": 12.125, "learning_rate": 5.072288178807976e-06, "loss": 1.4836, "step": 50820 }, { "epoch": 1.478895773336824, "grad_norm": 14.0, "learning_rate": 5.070348897135361e-06, "loss": 1.5094, "step": 50840 }, { "epoch": 1.479477557669372, "grad_norm": 13.5, "learning_rate": 5.068409615462746e-06, "loss": 1.5181, "step": 50860 }, { "epoch": 1.48005934200192, "grad_norm": 12.625, "learning_rate": 5.066470333790131e-06, "loss": 1.4571, "step": 50880 }, { "epoch": 1.4806411263344679, "grad_norm": 13.625, "learning_rate": 5.0645310521175165e-06, "loss": 1.4324, "step": 50900 }, { "epoch": 1.4812229106670158, "grad_norm": 11.9375, "learning_rate": 5.0625917704449015e-06, "loss": 1.576, "step": 50920 }, { "epoch": 1.4818046949995636, "grad_norm": 15.125, "learning_rate": 5.060652488772287e-06, "loss": 1.5361, "step": 50940 }, { "epoch": 1.4823864793321115, "grad_norm": 11.8125, "learning_rate": 5.058713207099672e-06, "loss": 1.5862, "step": 50960 }, { "epoch": 1.4829682636646595, "grad_norm": 13.0625, "learning_rate": 5.056773925427057e-06, "loss": 1.5856, "step": 50980 }, { "epoch": 1.4835500479972075, "grad_norm": 15.5, "learning_rate": 5.054834643754442e-06, "loss": 1.5927, "step": 51000 }, { "epoch": 1.4841318323297554, "grad_norm": 12.6875, "learning_rate": 5.052895362081827e-06, "loss": 1.5097, "step": 51020 }, { "epoch": 1.4847136166623032, "grad_norm": 14.375, "learning_rate": 5.050956080409211e-06, "loss": 1.4944, "step": 51040 }, { "epoch": 1.4852954009948511, "grad_norm": 12.5625, "learning_rate": 5.049016798736596e-06, "loss": 1.572, "step": 51060 }, { "epoch": 1.485877185327399, "grad_norm": 16.25, "learning_rate": 5.0470775170639815e-06, "loss": 1.5845, "step": 51080 }, { "epoch": 1.486458969659947, "grad_norm": 13.625, "learning_rate": 5.045138235391367e-06, "loss": 1.5383, "step": 51100 }, { "epoch": 1.487040753992495, "grad_norm": 16.0, "learning_rate": 5.043198953718752e-06, "loss": 1.5037, "step": 51120 }, { "epoch": 1.487622538325043, "grad_norm": 14.3125, "learning_rate": 5.041259672046137e-06, "loss": 1.4387, "step": 51140 }, { "epoch": 1.488204322657591, "grad_norm": 13.4375, "learning_rate": 5.039320390373522e-06, "loss": 1.4998, "step": 51160 }, { "epoch": 1.4887861069901387, "grad_norm": 20.75, "learning_rate": 5.037381108700907e-06, "loss": 1.5381, "step": 51180 }, { "epoch": 1.4893678913226867, "grad_norm": 10.8125, "learning_rate": 5.035441827028292e-06, "loss": 1.5232, "step": 51200 }, { "epoch": 1.4899496756552346, "grad_norm": 13.875, "learning_rate": 5.033502545355677e-06, "loss": 1.5357, "step": 51220 }, { "epoch": 1.4905314599877826, "grad_norm": 13.375, "learning_rate": 5.031563263683062e-06, "loss": 1.4933, "step": 51240 }, { "epoch": 1.4911132443203305, "grad_norm": 13.375, "learning_rate": 5.029623982010447e-06, "loss": 1.4801, "step": 51260 }, { "epoch": 1.4916950286528783, "grad_norm": 15.25, "learning_rate": 5.0276847003378325e-06, "loss": 1.5051, "step": 51280 }, { "epoch": 1.4922768129854262, "grad_norm": 17.375, "learning_rate": 5.0257454186652176e-06, "loss": 1.4838, "step": 51300 }, { "epoch": 1.4928585973179742, "grad_norm": 12.625, "learning_rate": 5.023806136992603e-06, "loss": 1.5299, "step": 51320 }, { "epoch": 1.4934403816505222, "grad_norm": 11.75, "learning_rate": 5.021866855319988e-06, "loss": 1.4594, "step": 51340 }, { "epoch": 1.4940221659830701, "grad_norm": 14.3125, "learning_rate": 5.019927573647373e-06, "loss": 1.5199, "step": 51360 }, { "epoch": 1.4946039503156179, "grad_norm": 13.0, "learning_rate": 5.017988291974758e-06, "loss": 1.5827, "step": 51380 }, { "epoch": 1.4951857346481658, "grad_norm": 11.3125, "learning_rate": 5.016049010302143e-06, "loss": 1.5119, "step": 51400 }, { "epoch": 1.4957675189807138, "grad_norm": 14.4375, "learning_rate": 5.014109728629528e-06, "loss": 1.5574, "step": 51420 }, { "epoch": 1.4963493033132618, "grad_norm": 11.1875, "learning_rate": 5.012170446956913e-06, "loss": 1.4693, "step": 51440 }, { "epoch": 1.4969310876458097, "grad_norm": 13.8125, "learning_rate": 5.010231165284298e-06, "loss": 1.6051, "step": 51460 }, { "epoch": 1.4975128719783577, "grad_norm": 13.3125, "learning_rate": 5.0082918836116834e-06, "loss": 1.5523, "step": 51480 }, { "epoch": 1.4980946563109057, "grad_norm": 13.125, "learning_rate": 5.0063526019390685e-06, "loss": 1.5153, "step": 51500 }, { "epoch": 1.4986764406434534, "grad_norm": 14.0625, "learning_rate": 5.004413320266454e-06, "loss": 1.492, "step": 51520 }, { "epoch": 1.4992582249760014, "grad_norm": 11.5, "learning_rate": 5.002474038593839e-06, "loss": 1.5005, "step": 51540 }, { "epoch": 1.4998400093085493, "grad_norm": 13.9375, "learning_rate": 5.000534756921224e-06, "loss": 1.6143, "step": 51560 }, { "epoch": 1.5004217936410973, "grad_norm": 15.9375, "learning_rate": 4.998595475248609e-06, "loss": 1.5416, "step": 51580 }, { "epoch": 1.5010035779736453, "grad_norm": 14.75, "learning_rate": 4.996656193575994e-06, "loss": 1.5004, "step": 51600 }, { "epoch": 1.501585362306193, "grad_norm": 14.875, "learning_rate": 4.994716911903379e-06, "loss": 1.4522, "step": 51620 }, { "epoch": 1.5021671466387412, "grad_norm": 13.0, "learning_rate": 4.992777630230764e-06, "loss": 1.5349, "step": 51640 }, { "epoch": 1.502748930971289, "grad_norm": 14.3125, "learning_rate": 4.990838348558149e-06, "loss": 1.4802, "step": 51660 }, { "epoch": 1.5033307153038369, "grad_norm": 11.0, "learning_rate": 4.988899066885534e-06, "loss": 1.6301, "step": 51680 }, { "epoch": 1.5039124996363848, "grad_norm": 11.25, "learning_rate": 4.9869597852129195e-06, "loss": 1.49, "step": 51700 }, { "epoch": 1.5044942839689326, "grad_norm": 14.0625, "learning_rate": 4.985020503540305e-06, "loss": 1.5538, "step": 51720 }, { "epoch": 1.5050760683014808, "grad_norm": 12.3125, "learning_rate": 4.98308122186769e-06, "loss": 1.5761, "step": 51740 }, { "epoch": 1.5056578526340285, "grad_norm": 14.125, "learning_rate": 4.981141940195075e-06, "loss": 1.4782, "step": 51760 }, { "epoch": 1.5062396369665765, "grad_norm": 15.1875, "learning_rate": 4.97920265852246e-06, "loss": 1.4338, "step": 51780 }, { "epoch": 1.5068214212991244, "grad_norm": 14.6875, "learning_rate": 4.977263376849845e-06, "loss": 1.5249, "step": 51800 }, { "epoch": 1.5074032056316722, "grad_norm": 13.3125, "learning_rate": 4.97532409517723e-06, "loss": 1.5055, "step": 51820 }, { "epoch": 1.5079849899642204, "grad_norm": 13.5625, "learning_rate": 4.973384813504615e-06, "loss": 1.5288, "step": 51840 }, { "epoch": 1.508566774296768, "grad_norm": 11.9375, "learning_rate": 4.9714455318319994e-06, "loss": 1.5375, "step": 51860 }, { "epoch": 1.509148558629316, "grad_norm": 14.75, "learning_rate": 4.9695062501593845e-06, "loss": 1.4812, "step": 51880 }, { "epoch": 1.509730342961864, "grad_norm": 12.8125, "learning_rate": 4.96756696848677e-06, "loss": 1.5579, "step": 51900 }, { "epoch": 1.510312127294412, "grad_norm": 13.125, "learning_rate": 4.965627686814155e-06, "loss": 1.6508, "step": 51920 }, { "epoch": 1.51089391162696, "grad_norm": 14.375, "learning_rate": 4.96368840514154e-06, "loss": 1.469, "step": 51940 }, { "epoch": 1.5114756959595077, "grad_norm": 15.4375, "learning_rate": 4.961749123468925e-06, "loss": 1.6221, "step": 51960 }, { "epoch": 1.5120574802920559, "grad_norm": 14.0625, "learning_rate": 4.95980984179631e-06, "loss": 1.5103, "step": 51980 }, { "epoch": 1.5126392646246036, "grad_norm": 12.5625, "learning_rate": 4.957870560123695e-06, "loss": 1.5693, "step": 52000 }, { "epoch": 1.5132210489571516, "grad_norm": 14.6875, "learning_rate": 4.95593127845108e-06, "loss": 1.4608, "step": 52020 }, { "epoch": 1.5138028332896996, "grad_norm": 13.375, "learning_rate": 4.953991996778465e-06, "loss": 1.533, "step": 52040 }, { "epoch": 1.5143846176222473, "grad_norm": 13.75, "learning_rate": 4.95205271510585e-06, "loss": 1.5669, "step": 52060 }, { "epoch": 1.5149664019547955, "grad_norm": 15.8125, "learning_rate": 4.9501134334332355e-06, "loss": 1.519, "step": 52080 }, { "epoch": 1.5155481862873432, "grad_norm": 12.375, "learning_rate": 4.948174151760621e-06, "loss": 1.5564, "step": 52100 }, { "epoch": 1.5161299706198912, "grad_norm": 12.6875, "learning_rate": 4.946234870088006e-06, "loss": 1.5168, "step": 52120 }, { "epoch": 1.5167117549524392, "grad_norm": 12.25, "learning_rate": 4.944295588415391e-06, "loss": 1.593, "step": 52140 }, { "epoch": 1.517293539284987, "grad_norm": 12.6875, "learning_rate": 4.942356306742776e-06, "loss": 1.6088, "step": 52160 }, { "epoch": 1.517875323617535, "grad_norm": 12.125, "learning_rate": 4.940417025070161e-06, "loss": 1.4852, "step": 52180 }, { "epoch": 1.5184571079500828, "grad_norm": 11.25, "learning_rate": 4.938477743397546e-06, "loss": 1.5165, "step": 52200 }, { "epoch": 1.5190388922826308, "grad_norm": 13.375, "learning_rate": 4.936538461724931e-06, "loss": 1.5314, "step": 52220 }, { "epoch": 1.5196206766151787, "grad_norm": 14.125, "learning_rate": 4.934599180052316e-06, "loss": 1.5572, "step": 52240 }, { "epoch": 1.5202024609477267, "grad_norm": 12.6875, "learning_rate": 4.932659898379701e-06, "loss": 1.5518, "step": 52260 }, { "epoch": 1.5207842452802747, "grad_norm": 11.1875, "learning_rate": 4.9307206167070865e-06, "loss": 1.5268, "step": 52280 }, { "epoch": 1.5213660296128224, "grad_norm": 16.625, "learning_rate": 4.928781335034472e-06, "loss": 1.5436, "step": 52300 }, { "epoch": 1.5219478139453706, "grad_norm": 13.9375, "learning_rate": 4.926842053361857e-06, "loss": 1.559, "step": 52320 }, { "epoch": 1.5225295982779183, "grad_norm": 11.625, "learning_rate": 4.924902771689242e-06, "loss": 1.5251, "step": 52340 }, { "epoch": 1.5231113826104663, "grad_norm": 12.4375, "learning_rate": 4.922963490016627e-06, "loss": 1.6352, "step": 52360 }, { "epoch": 1.5236931669430143, "grad_norm": 14.875, "learning_rate": 4.921024208344012e-06, "loss": 1.4764, "step": 52380 }, { "epoch": 1.524274951275562, "grad_norm": 13.1875, "learning_rate": 4.919084926671397e-06, "loss": 1.5555, "step": 52400 }, { "epoch": 1.5248567356081102, "grad_norm": 12.25, "learning_rate": 4.917145644998782e-06, "loss": 1.5648, "step": 52420 }, { "epoch": 1.525438519940658, "grad_norm": 13.0625, "learning_rate": 4.915206363326167e-06, "loss": 1.4756, "step": 52440 }, { "epoch": 1.526020304273206, "grad_norm": 11.0, "learning_rate": 4.913267081653552e-06, "loss": 1.5144, "step": 52460 }, { "epoch": 1.5266020886057539, "grad_norm": 13.375, "learning_rate": 4.9113277999809375e-06, "loss": 1.47, "step": 52480 }, { "epoch": 1.5271838729383018, "grad_norm": 12.125, "learning_rate": 4.9093885183083226e-06, "loss": 1.4436, "step": 52500 }, { "epoch": 1.5277656572708498, "grad_norm": 12.0625, "learning_rate": 4.907449236635708e-06, "loss": 1.5564, "step": 52520 }, { "epoch": 1.5283474416033975, "grad_norm": 13.5625, "learning_rate": 4.905509954963093e-06, "loss": 1.4803, "step": 52540 }, { "epoch": 1.5289292259359455, "grad_norm": 11.9375, "learning_rate": 4.903570673290478e-06, "loss": 1.4645, "step": 52560 }, { "epoch": 1.5295110102684935, "grad_norm": 13.25, "learning_rate": 4.901631391617863e-06, "loss": 1.5036, "step": 52580 }, { "epoch": 1.5300927946010414, "grad_norm": 13.5625, "learning_rate": 4.899692109945248e-06, "loss": 1.5826, "step": 52600 }, { "epoch": 1.5306745789335894, "grad_norm": 13.4375, "learning_rate": 4.897752828272633e-06, "loss": 1.5535, "step": 52620 }, { "epoch": 1.5312563632661371, "grad_norm": 13.0, "learning_rate": 4.895813546600018e-06, "loss": 1.5745, "step": 52640 }, { "epoch": 1.5318381475986853, "grad_norm": 12.1875, "learning_rate": 4.893874264927403e-06, "loss": 1.5237, "step": 52660 }, { "epoch": 1.532419931931233, "grad_norm": 12.375, "learning_rate": 4.891934983254788e-06, "loss": 1.505, "step": 52680 }, { "epoch": 1.533001716263781, "grad_norm": 11.9375, "learning_rate": 4.889995701582173e-06, "loss": 1.486, "step": 52700 }, { "epoch": 1.533583500596329, "grad_norm": 13.25, "learning_rate": 4.888056419909558e-06, "loss": 1.5246, "step": 52720 }, { "epoch": 1.5341652849288767, "grad_norm": 13.5625, "learning_rate": 4.886117138236943e-06, "loss": 1.5384, "step": 52740 }, { "epoch": 1.534747069261425, "grad_norm": 14.5, "learning_rate": 4.884177856564328e-06, "loss": 1.5441, "step": 52760 }, { "epoch": 1.5353288535939726, "grad_norm": 13.25, "learning_rate": 4.882238574891713e-06, "loss": 1.5209, "step": 52780 }, { "epoch": 1.5359106379265206, "grad_norm": 11.4375, "learning_rate": 4.880299293219098e-06, "loss": 1.5664, "step": 52800 }, { "epoch": 1.5364924222590686, "grad_norm": 12.6875, "learning_rate": 4.878360011546483e-06, "loss": 1.459, "step": 52820 }, { "epoch": 1.5370742065916165, "grad_norm": 15.6875, "learning_rate": 4.876420729873868e-06, "loss": 1.5468, "step": 52840 }, { "epoch": 1.5376559909241645, "grad_norm": 15.1875, "learning_rate": 4.8744814482012535e-06, "loss": 1.5809, "step": 52860 }, { "epoch": 1.5382377752567122, "grad_norm": 11.625, "learning_rate": 4.8725421665286386e-06, "loss": 1.5441, "step": 52880 }, { "epoch": 1.5388195595892604, "grad_norm": 17.25, "learning_rate": 4.870602884856024e-06, "loss": 1.588, "step": 52900 }, { "epoch": 1.5394013439218082, "grad_norm": 13.3125, "learning_rate": 4.868663603183409e-06, "loss": 1.4962, "step": 52920 }, { "epoch": 1.5399831282543561, "grad_norm": 14.0625, "learning_rate": 4.866724321510794e-06, "loss": 1.494, "step": 52940 }, { "epoch": 1.540564912586904, "grad_norm": 14.1875, "learning_rate": 4.864785039838179e-06, "loss": 1.5254, "step": 52960 }, { "epoch": 1.5411466969194518, "grad_norm": 15.9375, "learning_rate": 4.862845758165564e-06, "loss": 1.5036, "step": 52980 }, { "epoch": 1.541728481252, "grad_norm": 13.125, "learning_rate": 4.860906476492949e-06, "loss": 1.4845, "step": 53000 }, { "epoch": 1.5423102655845478, "grad_norm": 12.625, "learning_rate": 4.858967194820334e-06, "loss": 1.5102, "step": 53020 }, { "epoch": 1.5428920499170957, "grad_norm": 10.5625, "learning_rate": 4.857027913147719e-06, "loss": 1.5368, "step": 53040 }, { "epoch": 1.5434738342496437, "grad_norm": 13.875, "learning_rate": 4.8550886314751044e-06, "loss": 1.5532, "step": 53060 }, { "epoch": 1.5440556185821914, "grad_norm": 13.75, "learning_rate": 4.8531493498024895e-06, "loss": 1.5391, "step": 53080 }, { "epoch": 1.5446374029147396, "grad_norm": 14.0, "learning_rate": 4.851210068129875e-06, "loss": 1.5926, "step": 53100 }, { "epoch": 1.5452191872472874, "grad_norm": 12.5625, "learning_rate": 4.84927078645726e-06, "loss": 1.528, "step": 53120 }, { "epoch": 1.5458009715798353, "grad_norm": 15.0, "learning_rate": 4.847331504784645e-06, "loss": 1.481, "step": 53140 }, { "epoch": 1.5463827559123833, "grad_norm": 14.5625, "learning_rate": 4.84539222311203e-06, "loss": 1.5169, "step": 53160 }, { "epoch": 1.5469645402449312, "grad_norm": 9.125, "learning_rate": 4.843452941439415e-06, "loss": 1.537, "step": 53180 }, { "epoch": 1.5475463245774792, "grad_norm": 12.0625, "learning_rate": 4.8415136597668e-06, "loss": 1.4713, "step": 53200 }, { "epoch": 1.548128108910027, "grad_norm": 15.375, "learning_rate": 4.839574378094185e-06, "loss": 1.5724, "step": 53220 }, { "epoch": 1.5487098932425751, "grad_norm": 11.25, "learning_rate": 4.83763509642157e-06, "loss": 1.5441, "step": 53240 }, { "epoch": 1.5492916775751229, "grad_norm": 16.5, "learning_rate": 4.835695814748955e-06, "loss": 1.5224, "step": 53260 }, { "epoch": 1.5498734619076708, "grad_norm": 11.9375, "learning_rate": 4.8337565330763405e-06, "loss": 1.4652, "step": 53280 }, { "epoch": 1.5504552462402188, "grad_norm": 13.125, "learning_rate": 4.831817251403726e-06, "loss": 1.5336, "step": 53300 }, { "epoch": 1.5510370305727665, "grad_norm": 10.375, "learning_rate": 4.829877969731111e-06, "loss": 1.5128, "step": 53320 }, { "epoch": 1.5516188149053147, "grad_norm": 12.8125, "learning_rate": 4.827938688058496e-06, "loss": 1.4405, "step": 53340 }, { "epoch": 1.5522005992378625, "grad_norm": 11.5, "learning_rate": 4.825999406385881e-06, "loss": 1.581, "step": 53360 }, { "epoch": 1.5527823835704104, "grad_norm": 12.625, "learning_rate": 4.824060124713266e-06, "loss": 1.5412, "step": 53380 }, { "epoch": 1.5533641679029584, "grad_norm": 13.6875, "learning_rate": 4.822120843040651e-06, "loss": 1.4463, "step": 53400 }, { "epoch": 1.5539459522355061, "grad_norm": 13.3125, "learning_rate": 4.820181561368036e-06, "loss": 1.5782, "step": 53420 }, { "epoch": 1.5545277365680543, "grad_norm": 16.75, "learning_rate": 4.818242279695421e-06, "loss": 1.5875, "step": 53440 }, { "epoch": 1.555109520900602, "grad_norm": 12.0625, "learning_rate": 4.816302998022806e-06, "loss": 1.5127, "step": 53460 }, { "epoch": 1.55569130523315, "grad_norm": 10.5625, "learning_rate": 4.814363716350191e-06, "loss": 1.5307, "step": 53480 }, { "epoch": 1.556273089565698, "grad_norm": 14.625, "learning_rate": 4.812424434677576e-06, "loss": 1.4338, "step": 53500 }, { "epoch": 1.556854873898246, "grad_norm": 14.5625, "learning_rate": 4.810485153004961e-06, "loss": 1.5595, "step": 53520 }, { "epoch": 1.557436658230794, "grad_norm": 12.75, "learning_rate": 4.808545871332346e-06, "loss": 1.6282, "step": 53540 }, { "epoch": 1.5580184425633417, "grad_norm": 13.8125, "learning_rate": 4.806606589659731e-06, "loss": 1.4419, "step": 53560 }, { "epoch": 1.5586002268958898, "grad_norm": 13.3125, "learning_rate": 4.804667307987116e-06, "loss": 1.4877, "step": 53580 }, { "epoch": 1.5591820112284376, "grad_norm": 10.75, "learning_rate": 4.802728026314501e-06, "loss": 1.4213, "step": 53600 }, { "epoch": 1.5597637955609855, "grad_norm": 13.5625, "learning_rate": 4.800788744641886e-06, "loss": 1.5356, "step": 53620 }, { "epoch": 1.5603455798935335, "grad_norm": 13.125, "learning_rate": 4.798849462969271e-06, "loss": 1.4539, "step": 53640 }, { "epoch": 1.5609273642260812, "grad_norm": 12.6875, "learning_rate": 4.7969101812966565e-06, "loss": 1.4881, "step": 53660 }, { "epoch": 1.5615091485586294, "grad_norm": 13.375, "learning_rate": 4.794970899624042e-06, "loss": 1.4572, "step": 53680 }, { "epoch": 1.5620909328911772, "grad_norm": 14.5625, "learning_rate": 4.793031617951427e-06, "loss": 1.4704, "step": 53700 }, { "epoch": 1.5626727172237251, "grad_norm": 12.5625, "learning_rate": 4.791092336278812e-06, "loss": 1.5089, "step": 53720 }, { "epoch": 1.563254501556273, "grad_norm": 14.5625, "learning_rate": 4.789153054606197e-06, "loss": 1.4816, "step": 53740 }, { "epoch": 1.563836285888821, "grad_norm": 12.4375, "learning_rate": 4.787213772933582e-06, "loss": 1.5167, "step": 53760 }, { "epoch": 1.564418070221369, "grad_norm": 13.1875, "learning_rate": 4.785274491260966e-06, "loss": 1.5706, "step": 53780 }, { "epoch": 1.5649998545539168, "grad_norm": 13.4375, "learning_rate": 4.783335209588351e-06, "loss": 1.4895, "step": 53800 }, { "epoch": 1.5655816388864647, "grad_norm": 14.0, "learning_rate": 4.7813959279157364e-06, "loss": 1.5177, "step": 53820 }, { "epoch": 1.5661634232190127, "grad_norm": 13.125, "learning_rate": 4.7794566462431215e-06, "loss": 1.5508, "step": 53840 }, { "epoch": 1.5667452075515607, "grad_norm": 13.875, "learning_rate": 4.777517364570507e-06, "loss": 1.6271, "step": 53860 }, { "epoch": 1.5673269918841086, "grad_norm": 12.375, "learning_rate": 4.775578082897892e-06, "loss": 1.5082, "step": 53880 }, { "epoch": 1.5679087762166564, "grad_norm": 12.8125, "learning_rate": 4.773638801225277e-06, "loss": 1.5444, "step": 53900 }, { "epoch": 1.5684905605492045, "grad_norm": 12.4375, "learning_rate": 4.771699519552662e-06, "loss": 1.5745, "step": 53920 }, { "epoch": 1.5690723448817523, "grad_norm": 13.25, "learning_rate": 4.769760237880047e-06, "loss": 1.5928, "step": 53940 }, { "epoch": 1.5696541292143003, "grad_norm": 12.875, "learning_rate": 4.767820956207432e-06, "loss": 1.5923, "step": 53960 }, { "epoch": 1.5702359135468482, "grad_norm": 11.8125, "learning_rate": 4.765881674534817e-06, "loss": 1.5671, "step": 53980 }, { "epoch": 1.570817697879396, "grad_norm": 15.1875, "learning_rate": 4.763942392862202e-06, "loss": 1.5224, "step": 54000 }, { "epoch": 1.5713994822119441, "grad_norm": 14.25, "learning_rate": 4.762003111189587e-06, "loss": 1.4886, "step": 54020 }, { "epoch": 1.5719812665444919, "grad_norm": 11.8125, "learning_rate": 4.7600638295169725e-06, "loss": 1.4678, "step": 54040 }, { "epoch": 1.5725630508770398, "grad_norm": 13.75, "learning_rate": 4.758124547844358e-06, "loss": 1.4967, "step": 54060 }, { "epoch": 1.5731448352095878, "grad_norm": 13.375, "learning_rate": 4.756185266171743e-06, "loss": 1.5169, "step": 54080 }, { "epoch": 1.5737266195421358, "grad_norm": 12.6875, "learning_rate": 4.754245984499128e-06, "loss": 1.4806, "step": 54100 }, { "epoch": 1.5743084038746837, "grad_norm": 14.5, "learning_rate": 4.752306702826513e-06, "loss": 1.5104, "step": 54120 }, { "epoch": 1.5748901882072315, "grad_norm": 11.6875, "learning_rate": 4.750367421153898e-06, "loss": 1.5267, "step": 54140 }, { "epoch": 1.5754719725397797, "grad_norm": 10.8125, "learning_rate": 4.748428139481283e-06, "loss": 1.4444, "step": 54160 }, { "epoch": 1.5760537568723274, "grad_norm": 14.75, "learning_rate": 4.746488857808668e-06, "loss": 1.5271, "step": 54180 }, { "epoch": 1.5766355412048754, "grad_norm": 14.0, "learning_rate": 4.744549576136053e-06, "loss": 1.4962, "step": 54200 }, { "epoch": 1.5772173255374233, "grad_norm": 12.6875, "learning_rate": 4.742610294463438e-06, "loss": 1.5658, "step": 54220 }, { "epoch": 1.577799109869971, "grad_norm": 11.3125, "learning_rate": 4.7406710127908235e-06, "loss": 1.5937, "step": 54240 }, { "epoch": 1.5783808942025193, "grad_norm": 13.5625, "learning_rate": 4.738731731118209e-06, "loss": 1.52, "step": 54260 }, { "epoch": 1.578962678535067, "grad_norm": 15.1875, "learning_rate": 4.736792449445594e-06, "loss": 1.4848, "step": 54280 }, { "epoch": 1.579544462867615, "grad_norm": 9.9375, "learning_rate": 4.734853167772979e-06, "loss": 1.4543, "step": 54300 }, { "epoch": 1.580126247200163, "grad_norm": 13.625, "learning_rate": 4.732913886100364e-06, "loss": 1.4826, "step": 54320 }, { "epoch": 1.5807080315327107, "grad_norm": 12.1875, "learning_rate": 4.730974604427749e-06, "loss": 1.5062, "step": 54340 }, { "epoch": 1.5812898158652589, "grad_norm": 10.8125, "learning_rate": 4.729035322755134e-06, "loss": 1.4645, "step": 54360 }, { "epoch": 1.5818716001978066, "grad_norm": 13.3125, "learning_rate": 4.727096041082519e-06, "loss": 1.5349, "step": 54380 }, { "epoch": 1.5824533845303546, "grad_norm": 11.6875, "learning_rate": 4.725156759409904e-06, "loss": 1.5129, "step": 54400 }, { "epoch": 1.5830351688629025, "grad_norm": 15.125, "learning_rate": 4.723217477737289e-06, "loss": 1.5458, "step": 54420 }, { "epoch": 1.5836169531954505, "grad_norm": 11.375, "learning_rate": 4.7212781960646745e-06, "loss": 1.5131, "step": 54440 }, { "epoch": 1.5841987375279984, "grad_norm": 10.9375, "learning_rate": 4.7193389143920596e-06, "loss": 1.6478, "step": 54460 }, { "epoch": 1.5847805218605462, "grad_norm": 14.875, "learning_rate": 4.717399632719445e-06, "loss": 1.5146, "step": 54480 }, { "epoch": 1.5853623061930944, "grad_norm": 12.0, "learning_rate": 4.71546035104683e-06, "loss": 1.5281, "step": 54500 }, { "epoch": 1.5859440905256421, "grad_norm": 13.8125, "learning_rate": 4.713521069374215e-06, "loss": 1.575, "step": 54520 }, { "epoch": 1.58652587485819, "grad_norm": 16.875, "learning_rate": 4.7115817877016e-06, "loss": 1.5869, "step": 54540 }, { "epoch": 1.587107659190738, "grad_norm": 12.375, "learning_rate": 4.709642506028985e-06, "loss": 1.4953, "step": 54560 }, { "epoch": 1.5876894435232858, "grad_norm": 10.875, "learning_rate": 4.70770322435637e-06, "loss": 1.5689, "step": 54580 }, { "epoch": 1.588271227855834, "grad_norm": 17.25, "learning_rate": 4.705763942683754e-06, "loss": 1.5589, "step": 54600 }, { "epoch": 1.5888530121883817, "grad_norm": 13.0, "learning_rate": 4.7038246610111395e-06, "loss": 1.5302, "step": 54620 }, { "epoch": 1.5894347965209297, "grad_norm": 13.5625, "learning_rate": 4.701885379338525e-06, "loss": 1.3932, "step": 54640 }, { "epoch": 1.5900165808534776, "grad_norm": 11.875, "learning_rate": 4.69994609766591e-06, "loss": 1.4435, "step": 54660 }, { "epoch": 1.5905983651860254, "grad_norm": 13.125, "learning_rate": 4.698006815993295e-06, "loss": 1.5546, "step": 54680 }, { "epoch": 1.5911801495185736, "grad_norm": 11.625, "learning_rate": 4.69606753432068e-06, "loss": 1.5133, "step": 54700 }, { "epoch": 1.5917619338511213, "grad_norm": 14.1875, "learning_rate": 4.694128252648065e-06, "loss": 1.4977, "step": 54720 }, { "epoch": 1.5923437181836693, "grad_norm": 14.6875, "learning_rate": 4.69218897097545e-06, "loss": 1.6043, "step": 54740 }, { "epoch": 1.5929255025162172, "grad_norm": 15.0625, "learning_rate": 4.690249689302835e-06, "loss": 1.5224, "step": 54760 }, { "epoch": 1.5935072868487652, "grad_norm": 16.5, "learning_rate": 4.68831040763022e-06, "loss": 1.5228, "step": 54780 }, { "epoch": 1.5940890711813132, "grad_norm": 16.125, "learning_rate": 4.686371125957605e-06, "loss": 1.5182, "step": 54800 }, { "epoch": 1.594670855513861, "grad_norm": 16.375, "learning_rate": 4.6844318442849905e-06, "loss": 1.4713, "step": 54820 }, { "epoch": 1.595252639846409, "grad_norm": 13.5625, "learning_rate": 4.6824925626123756e-06, "loss": 1.589, "step": 54840 }, { "epoch": 1.5958344241789568, "grad_norm": 12.8125, "learning_rate": 4.680553280939761e-06, "loss": 1.5047, "step": 54860 }, { "epoch": 1.5964162085115048, "grad_norm": 15.1875, "learning_rate": 4.678613999267146e-06, "loss": 1.5411, "step": 54880 }, { "epoch": 1.5969979928440527, "grad_norm": 15.4375, "learning_rate": 4.676674717594531e-06, "loss": 1.4919, "step": 54900 }, { "epoch": 1.5975797771766005, "grad_norm": 16.0, "learning_rate": 4.674735435921916e-06, "loss": 1.6321, "step": 54920 }, { "epoch": 1.5981615615091487, "grad_norm": 14.25, "learning_rate": 4.672796154249301e-06, "loss": 1.4955, "step": 54940 }, { "epoch": 1.5987433458416964, "grad_norm": 11.4375, "learning_rate": 4.670856872576686e-06, "loss": 1.5507, "step": 54960 }, { "epoch": 1.5993251301742444, "grad_norm": 9.8125, "learning_rate": 4.668917590904071e-06, "loss": 1.514, "step": 54980 }, { "epoch": 1.5999069145067923, "grad_norm": 12.6875, "learning_rate": 4.666978309231456e-06, "loss": 1.4391, "step": 55000 }, { "epoch": 1.6004886988393403, "grad_norm": 13.0625, "learning_rate": 4.6650390275588414e-06, "loss": 1.5631, "step": 55020 }, { "epoch": 1.6010704831718883, "grad_norm": 11.625, "learning_rate": 4.6630997458862265e-06, "loss": 1.4999, "step": 55040 }, { "epoch": 1.601652267504436, "grad_norm": 11.125, "learning_rate": 4.661160464213612e-06, "loss": 1.5002, "step": 55060 }, { "epoch": 1.602234051836984, "grad_norm": 13.125, "learning_rate": 4.659221182540997e-06, "loss": 1.451, "step": 55080 }, { "epoch": 1.602815836169532, "grad_norm": 10.5625, "learning_rate": 4.657281900868382e-06, "loss": 1.5922, "step": 55100 }, { "epoch": 1.60339762050208, "grad_norm": 15.125, "learning_rate": 4.655342619195767e-06, "loss": 1.5512, "step": 55120 }, { "epoch": 1.6039794048346279, "grad_norm": 13.0, "learning_rate": 4.653403337523152e-06, "loss": 1.5206, "step": 55140 }, { "epoch": 1.6045611891671756, "grad_norm": 13.9375, "learning_rate": 4.651464055850537e-06, "loss": 1.5631, "step": 55160 }, { "epoch": 1.6051429734997238, "grad_norm": 13.375, "learning_rate": 4.649524774177922e-06, "loss": 1.5608, "step": 55180 }, { "epoch": 1.6057247578322715, "grad_norm": 13.6875, "learning_rate": 4.647585492505307e-06, "loss": 1.5139, "step": 55200 }, { "epoch": 1.6063065421648195, "grad_norm": 13.9375, "learning_rate": 4.645646210832692e-06, "loss": 1.5417, "step": 55220 }, { "epoch": 1.6068883264973675, "grad_norm": 15.0, "learning_rate": 4.6437069291600775e-06, "loss": 1.4508, "step": 55240 }, { "epoch": 1.6074701108299152, "grad_norm": 13.125, "learning_rate": 4.641767647487463e-06, "loss": 1.5182, "step": 55260 }, { "epoch": 1.6080518951624634, "grad_norm": 12.125, "learning_rate": 4.639828365814848e-06, "loss": 1.5126, "step": 55280 }, { "epoch": 1.6086336794950111, "grad_norm": 14.625, "learning_rate": 4.637889084142233e-06, "loss": 1.5812, "step": 55300 }, { "epoch": 1.609215463827559, "grad_norm": 14.75, "learning_rate": 4.635949802469618e-06, "loss": 1.5074, "step": 55320 }, { "epoch": 1.609797248160107, "grad_norm": 12.75, "learning_rate": 4.634010520797003e-06, "loss": 1.4601, "step": 55340 }, { "epoch": 1.610379032492655, "grad_norm": 12.4375, "learning_rate": 4.632071239124388e-06, "loss": 1.5758, "step": 55360 }, { "epoch": 1.610960816825203, "grad_norm": 10.25, "learning_rate": 4.630131957451773e-06, "loss": 1.5303, "step": 55380 }, { "epoch": 1.6115426011577507, "grad_norm": 14.3125, "learning_rate": 4.6281926757791575e-06, "loss": 1.506, "step": 55400 }, { "epoch": 1.612124385490299, "grad_norm": 14.75, "learning_rate": 4.6262533941065426e-06, "loss": 1.4768, "step": 55420 }, { "epoch": 1.6127061698228466, "grad_norm": 13.75, "learning_rate": 4.624314112433928e-06, "loss": 1.5796, "step": 55440 }, { "epoch": 1.6132879541553946, "grad_norm": 13.4375, "learning_rate": 4.622374830761313e-06, "loss": 1.5388, "step": 55460 }, { "epoch": 1.6138697384879426, "grad_norm": 14.5, "learning_rate": 4.620435549088698e-06, "loss": 1.5302, "step": 55480 }, { "epoch": 1.6144515228204903, "grad_norm": 11.875, "learning_rate": 4.618496267416083e-06, "loss": 1.5086, "step": 55500 }, { "epoch": 1.6150333071530385, "grad_norm": 12.125, "learning_rate": 4.616556985743468e-06, "loss": 1.5663, "step": 55520 }, { "epoch": 1.6156150914855862, "grad_norm": 14.5, "learning_rate": 4.614617704070853e-06, "loss": 1.4878, "step": 55540 }, { "epoch": 1.6161968758181342, "grad_norm": 15.9375, "learning_rate": 4.612678422398238e-06, "loss": 1.5529, "step": 55560 }, { "epoch": 1.6167786601506822, "grad_norm": 13.125, "learning_rate": 4.610739140725623e-06, "loss": 1.3758, "step": 55580 }, { "epoch": 1.61736044448323, "grad_norm": 14.8125, "learning_rate": 4.6087998590530084e-06, "loss": 1.4997, "step": 55600 }, { "epoch": 1.617942228815778, "grad_norm": 13.75, "learning_rate": 4.6068605773803935e-06, "loss": 1.4679, "step": 55620 }, { "epoch": 1.6185240131483258, "grad_norm": 18.875, "learning_rate": 4.604921295707779e-06, "loss": 1.5344, "step": 55640 }, { "epoch": 1.6191057974808738, "grad_norm": 13.375, "learning_rate": 4.602982014035164e-06, "loss": 1.4957, "step": 55660 }, { "epoch": 1.6196875818134218, "grad_norm": 14.3125, "learning_rate": 4.601042732362548e-06, "loss": 1.5895, "step": 55680 }, { "epoch": 1.6202693661459697, "grad_norm": 12.0625, "learning_rate": 4.599103450689933e-06, "loss": 1.5243, "step": 55700 }, { "epoch": 1.6208511504785177, "grad_norm": 13.8125, "learning_rate": 4.597164169017318e-06, "loss": 1.5176, "step": 55720 }, { "epoch": 1.6214329348110654, "grad_norm": 13.9375, "learning_rate": 4.595224887344703e-06, "loss": 1.5591, "step": 55740 }, { "epoch": 1.6220147191436136, "grad_norm": 12.75, "learning_rate": 4.593285605672088e-06, "loss": 1.5138, "step": 55760 }, { "epoch": 1.6225965034761614, "grad_norm": 11.6875, "learning_rate": 4.5913463239994735e-06, "loss": 1.5448, "step": 55780 }, { "epoch": 1.6231782878087093, "grad_norm": 13.8125, "learning_rate": 4.5894070423268586e-06, "loss": 1.5806, "step": 55800 }, { "epoch": 1.6237600721412573, "grad_norm": 13.75, "learning_rate": 4.587467760654244e-06, "loss": 1.5211, "step": 55820 }, { "epoch": 1.624341856473805, "grad_norm": 12.0, "learning_rate": 4.585528478981629e-06, "loss": 1.5163, "step": 55840 }, { "epoch": 1.6249236408063532, "grad_norm": 12.8125, "learning_rate": 4.583589197309014e-06, "loss": 1.5664, "step": 55860 }, { "epoch": 1.625505425138901, "grad_norm": 14.125, "learning_rate": 4.581649915636399e-06, "loss": 1.464, "step": 55880 }, { "epoch": 1.626087209471449, "grad_norm": 12.9375, "learning_rate": 4.579710633963784e-06, "loss": 1.5305, "step": 55900 }, { "epoch": 1.6266689938039969, "grad_norm": 13.4375, "learning_rate": 4.577771352291169e-06, "loss": 1.5297, "step": 55920 }, { "epoch": 1.6272507781365446, "grad_norm": 15.75, "learning_rate": 4.575832070618554e-06, "loss": 1.4527, "step": 55940 }, { "epoch": 1.6278325624690928, "grad_norm": 12.3125, "learning_rate": 4.573892788945939e-06, "loss": 1.5322, "step": 55960 }, { "epoch": 1.6284143468016405, "grad_norm": 12.4375, "learning_rate": 4.5719535072733244e-06, "loss": 1.5373, "step": 55980 }, { "epoch": 1.6289961311341885, "grad_norm": 14.25, "learning_rate": 4.5700142256007095e-06, "loss": 1.5091, "step": 56000 }, { "epoch": 1.6295779154667365, "grad_norm": 13.625, "learning_rate": 4.568074943928095e-06, "loss": 1.5388, "step": 56020 }, { "epoch": 1.6301596997992844, "grad_norm": 16.625, "learning_rate": 4.56613566225548e-06, "loss": 1.5081, "step": 56040 }, { "epoch": 1.6307414841318324, "grad_norm": 11.625, "learning_rate": 4.564196380582865e-06, "loss": 1.4889, "step": 56060 }, { "epoch": 1.6313232684643801, "grad_norm": 11.875, "learning_rate": 4.56225709891025e-06, "loss": 1.5055, "step": 56080 }, { "epoch": 1.6319050527969283, "grad_norm": 13.6875, "learning_rate": 4.560317817237635e-06, "loss": 1.5741, "step": 56100 }, { "epoch": 1.632486837129476, "grad_norm": 13.1875, "learning_rate": 4.55837853556502e-06, "loss": 1.4833, "step": 56120 }, { "epoch": 1.633068621462024, "grad_norm": 14.75, "learning_rate": 4.556439253892405e-06, "loss": 1.4771, "step": 56140 }, { "epoch": 1.633650405794572, "grad_norm": 13.0, "learning_rate": 4.55449997221979e-06, "loss": 1.4479, "step": 56160 }, { "epoch": 1.6342321901271197, "grad_norm": 12.6875, "learning_rate": 4.552560690547175e-06, "loss": 1.5089, "step": 56180 }, { "epoch": 1.634813974459668, "grad_norm": 13.5625, "learning_rate": 4.5506214088745605e-06, "loss": 1.5075, "step": 56200 }, { "epoch": 1.6353957587922157, "grad_norm": 13.625, "learning_rate": 4.548682127201946e-06, "loss": 1.4791, "step": 56220 }, { "epoch": 1.6359775431247636, "grad_norm": 15.375, "learning_rate": 4.546742845529331e-06, "loss": 1.5792, "step": 56240 }, { "epoch": 1.6365593274573116, "grad_norm": 14.375, "learning_rate": 4.544803563856716e-06, "loss": 1.5874, "step": 56260 }, { "epoch": 1.6371411117898595, "grad_norm": 11.6875, "learning_rate": 4.542864282184101e-06, "loss": 1.5322, "step": 56280 }, { "epoch": 1.6377228961224075, "grad_norm": 11.6875, "learning_rate": 4.540925000511486e-06, "loss": 1.5733, "step": 56300 }, { "epoch": 1.6383046804549553, "grad_norm": 12.8125, "learning_rate": 4.538985718838871e-06, "loss": 1.4755, "step": 56320 }, { "epoch": 1.6388864647875034, "grad_norm": 14.125, "learning_rate": 4.537046437166256e-06, "loss": 1.4399, "step": 56340 }, { "epoch": 1.6394682491200512, "grad_norm": 16.125, "learning_rate": 4.535107155493641e-06, "loss": 1.5363, "step": 56360 }, { "epoch": 1.6400500334525991, "grad_norm": 13.0, "learning_rate": 4.533167873821026e-06, "loss": 1.539, "step": 56380 }, { "epoch": 1.640631817785147, "grad_norm": 11.8125, "learning_rate": 4.5312285921484115e-06, "loss": 1.4282, "step": 56400 }, { "epoch": 1.6412136021176948, "grad_norm": 12.6875, "learning_rate": 4.529289310475797e-06, "loss": 1.5218, "step": 56420 }, { "epoch": 1.641795386450243, "grad_norm": 13.3125, "learning_rate": 4.527350028803182e-06, "loss": 1.5561, "step": 56440 }, { "epoch": 1.6423771707827908, "grad_norm": 10.875, "learning_rate": 4.525410747130567e-06, "loss": 1.4688, "step": 56460 }, { "epoch": 1.6429589551153387, "grad_norm": 16.375, "learning_rate": 4.523471465457952e-06, "loss": 1.5251, "step": 56480 }, { "epoch": 1.6435407394478867, "grad_norm": 12.3125, "learning_rate": 4.521532183785336e-06, "loss": 1.5564, "step": 56500 }, { "epoch": 1.6441225237804344, "grad_norm": 11.3125, "learning_rate": 4.519592902112721e-06, "loss": 1.5388, "step": 56520 }, { "epoch": 1.6447043081129826, "grad_norm": 9.9375, "learning_rate": 4.517653620440106e-06, "loss": 1.5414, "step": 56540 }, { "epoch": 1.6452860924455304, "grad_norm": 13.1875, "learning_rate": 4.515714338767491e-06, "loss": 1.5193, "step": 56560 }, { "epoch": 1.6458678767780783, "grad_norm": 14.375, "learning_rate": 4.5137750570948765e-06, "loss": 1.5994, "step": 56580 }, { "epoch": 1.6464496611106263, "grad_norm": 12.625, "learning_rate": 4.511835775422262e-06, "loss": 1.5887, "step": 56600 }, { "epoch": 1.6470314454431743, "grad_norm": 16.75, "learning_rate": 4.509896493749647e-06, "loss": 1.5113, "step": 56620 }, { "epoch": 1.6476132297757222, "grad_norm": 13.3125, "learning_rate": 4.507957212077032e-06, "loss": 1.6203, "step": 56640 }, { "epoch": 1.64819501410827, "grad_norm": 11.6875, "learning_rate": 4.506017930404417e-06, "loss": 1.4855, "step": 56660 }, { "epoch": 1.6487767984408181, "grad_norm": 14.375, "learning_rate": 4.504078648731802e-06, "loss": 1.4891, "step": 56680 }, { "epoch": 1.6493585827733659, "grad_norm": 12.5625, "learning_rate": 4.502139367059187e-06, "loss": 1.5076, "step": 56700 }, { "epoch": 1.6499403671059139, "grad_norm": 11.75, "learning_rate": 4.500200085386572e-06, "loss": 1.5899, "step": 56720 }, { "epoch": 1.6505221514384618, "grad_norm": 13.25, "learning_rate": 4.498260803713957e-06, "loss": 1.5738, "step": 56740 }, { "epoch": 1.6511039357710096, "grad_norm": 11.9375, "learning_rate": 4.496321522041342e-06, "loss": 1.5436, "step": 56760 }, { "epoch": 1.6516857201035577, "grad_norm": 14.0, "learning_rate": 4.4943822403687275e-06, "loss": 1.5207, "step": 56780 }, { "epoch": 1.6522675044361055, "grad_norm": 11.5, "learning_rate": 4.492442958696113e-06, "loss": 1.5748, "step": 56800 }, { "epoch": 1.6528492887686534, "grad_norm": 15.5625, "learning_rate": 4.490503677023498e-06, "loss": 1.5621, "step": 56820 }, { "epoch": 1.6534310731012014, "grad_norm": 12.375, "learning_rate": 4.488564395350883e-06, "loss": 1.5014, "step": 56840 }, { "epoch": 1.6540128574337492, "grad_norm": 12.75, "learning_rate": 4.486625113678268e-06, "loss": 1.5741, "step": 56860 }, { "epoch": 1.6545946417662973, "grad_norm": 14.1875, "learning_rate": 4.484685832005653e-06, "loss": 1.6574, "step": 56880 }, { "epoch": 1.655176426098845, "grad_norm": 12.875, "learning_rate": 4.482746550333038e-06, "loss": 1.4918, "step": 56900 }, { "epoch": 1.655758210431393, "grad_norm": 11.875, "learning_rate": 4.480807268660423e-06, "loss": 1.4969, "step": 56920 }, { "epoch": 1.656339994763941, "grad_norm": 14.5, "learning_rate": 4.478867986987808e-06, "loss": 1.5432, "step": 56940 }, { "epoch": 1.656921779096489, "grad_norm": 13.6875, "learning_rate": 4.476928705315193e-06, "loss": 1.5645, "step": 56960 }, { "epoch": 1.657503563429037, "grad_norm": 15.25, "learning_rate": 4.4749894236425785e-06, "loss": 1.4698, "step": 56980 }, { "epoch": 1.6580853477615847, "grad_norm": 13.5625, "learning_rate": 4.4730501419699636e-06, "loss": 1.5622, "step": 57000 }, { "epoch": 1.6586671320941329, "grad_norm": 12.5625, "learning_rate": 4.471110860297349e-06, "loss": 1.5406, "step": 57020 }, { "epoch": 1.6592489164266806, "grad_norm": 15.25, "learning_rate": 4.469171578624734e-06, "loss": 1.5354, "step": 57040 }, { "epoch": 1.6598307007592286, "grad_norm": 13.1875, "learning_rate": 4.467232296952119e-06, "loss": 1.4825, "step": 57060 }, { "epoch": 1.6604124850917765, "grad_norm": 13.6875, "learning_rate": 4.465293015279504e-06, "loss": 1.4245, "step": 57080 }, { "epoch": 1.6609942694243243, "grad_norm": 13.625, "learning_rate": 4.463353733606889e-06, "loss": 1.5259, "step": 57100 }, { "epoch": 1.6615760537568725, "grad_norm": 12.375, "learning_rate": 4.461414451934274e-06, "loss": 1.562, "step": 57120 }, { "epoch": 1.6621578380894202, "grad_norm": 9.0625, "learning_rate": 4.459475170261659e-06, "loss": 1.552, "step": 57140 }, { "epoch": 1.6627396224219682, "grad_norm": 15.4375, "learning_rate": 4.457535888589044e-06, "loss": 1.4998, "step": 57160 }, { "epoch": 1.6633214067545161, "grad_norm": 15.0625, "learning_rate": 4.4555966069164294e-06, "loss": 1.569, "step": 57180 }, { "epoch": 1.6639031910870639, "grad_norm": 12.75, "learning_rate": 4.4536573252438145e-06, "loss": 1.4864, "step": 57200 }, { "epoch": 1.664484975419612, "grad_norm": 13.1875, "learning_rate": 4.4517180435712e-06, "loss": 1.482, "step": 57220 }, { "epoch": 1.6650667597521598, "grad_norm": 13.0625, "learning_rate": 4.449778761898585e-06, "loss": 1.5329, "step": 57240 }, { "epoch": 1.6656485440847077, "grad_norm": 17.0, "learning_rate": 4.44783948022597e-06, "loss": 1.497, "step": 57260 }, { "epoch": 1.6662303284172557, "grad_norm": 16.5, "learning_rate": 4.445900198553355e-06, "loss": 1.5104, "step": 57280 }, { "epoch": 1.6668121127498037, "grad_norm": 13.0, "learning_rate": 4.44396091688074e-06, "loss": 1.5361, "step": 57300 }, { "epoch": 1.6673938970823516, "grad_norm": 14.5, "learning_rate": 4.442021635208124e-06, "loss": 1.4874, "step": 57320 }, { "epoch": 1.6679756814148994, "grad_norm": 13.625, "learning_rate": 4.440082353535509e-06, "loss": 1.4985, "step": 57340 }, { "epoch": 1.6685574657474476, "grad_norm": 14.1875, "learning_rate": 4.4381430718628945e-06, "loss": 1.5271, "step": 57360 }, { "epoch": 1.6691392500799953, "grad_norm": 11.8125, "learning_rate": 4.4362037901902796e-06, "loss": 1.5104, "step": 57380 }, { "epoch": 1.6697210344125433, "grad_norm": 12.125, "learning_rate": 4.434264508517665e-06, "loss": 1.4442, "step": 57400 }, { "epoch": 1.6703028187450912, "grad_norm": 14.6875, "learning_rate": 4.43232522684505e-06, "loss": 1.5881, "step": 57420 }, { "epoch": 1.670884603077639, "grad_norm": 12.0, "learning_rate": 4.430385945172435e-06, "loss": 1.4902, "step": 57440 }, { "epoch": 1.6714663874101872, "grad_norm": 12.4375, "learning_rate": 4.42844666349982e-06, "loss": 1.5443, "step": 57460 }, { "epoch": 1.672048171742735, "grad_norm": 10.9375, "learning_rate": 4.426507381827205e-06, "loss": 1.5009, "step": 57480 }, { "epoch": 1.6726299560752829, "grad_norm": 11.625, "learning_rate": 4.42456810015459e-06, "loss": 1.4822, "step": 57500 }, { "epoch": 1.6732117404078308, "grad_norm": 12.25, "learning_rate": 4.422628818481975e-06, "loss": 1.5753, "step": 57520 }, { "epoch": 1.6737935247403788, "grad_norm": 13.9375, "learning_rate": 4.42068953680936e-06, "loss": 1.4683, "step": 57540 }, { "epoch": 1.6743753090729268, "grad_norm": 11.4375, "learning_rate": 4.4187502551367454e-06, "loss": 1.5026, "step": 57560 }, { "epoch": 1.6749570934054745, "grad_norm": 16.25, "learning_rate": 4.4168109734641305e-06, "loss": 1.5711, "step": 57580 }, { "epoch": 1.6755388777380227, "grad_norm": 12.75, "learning_rate": 4.414871691791516e-06, "loss": 1.5626, "step": 57600 }, { "epoch": 1.6761206620705704, "grad_norm": 14.1875, "learning_rate": 4.412932410118901e-06, "loss": 1.5723, "step": 57620 }, { "epoch": 1.6767024464031184, "grad_norm": 13.0, "learning_rate": 4.410993128446286e-06, "loss": 1.5563, "step": 57640 }, { "epoch": 1.6772842307356663, "grad_norm": 15.875, "learning_rate": 4.409053846773671e-06, "loss": 1.5386, "step": 57660 }, { "epoch": 1.677866015068214, "grad_norm": 15.9375, "learning_rate": 4.407114565101056e-06, "loss": 1.494, "step": 57680 }, { "epoch": 1.6784477994007623, "grad_norm": 11.3125, "learning_rate": 4.405175283428441e-06, "loss": 1.5629, "step": 57700 }, { "epoch": 1.67902958373331, "grad_norm": 12.5625, "learning_rate": 4.403236001755825e-06, "loss": 1.4474, "step": 57720 }, { "epoch": 1.679611368065858, "grad_norm": 11.75, "learning_rate": 4.4012967200832105e-06, "loss": 1.6147, "step": 57740 }, { "epoch": 1.680193152398406, "grad_norm": 14.4375, "learning_rate": 4.3993574384105956e-06, "loss": 1.4958, "step": 57760 }, { "epoch": 1.6807749367309537, "grad_norm": 14.625, "learning_rate": 4.397418156737981e-06, "loss": 1.5962, "step": 57780 }, { "epoch": 1.6813567210635019, "grad_norm": 15.4375, "learning_rate": 4.395478875065366e-06, "loss": 1.5277, "step": 57800 }, { "epoch": 1.6819385053960496, "grad_norm": 13.5, "learning_rate": 4.393539593392751e-06, "loss": 1.5227, "step": 57820 }, { "epoch": 1.6825202897285976, "grad_norm": 14.4375, "learning_rate": 4.391600311720136e-06, "loss": 1.5584, "step": 57840 }, { "epoch": 1.6831020740611455, "grad_norm": 12.875, "learning_rate": 4.389661030047521e-06, "loss": 1.6491, "step": 57860 }, { "epoch": 1.6836838583936935, "grad_norm": 12.4375, "learning_rate": 4.387721748374906e-06, "loss": 1.5882, "step": 57880 }, { "epoch": 1.6842656427262415, "grad_norm": 13.3125, "learning_rate": 4.385782466702291e-06, "loss": 1.567, "step": 57900 }, { "epoch": 1.6848474270587892, "grad_norm": 13.0625, "learning_rate": 4.383843185029676e-06, "loss": 1.5049, "step": 57920 }, { "epoch": 1.6854292113913374, "grad_norm": 13.375, "learning_rate": 4.3819039033570614e-06, "loss": 1.5021, "step": 57940 }, { "epoch": 1.6860109957238851, "grad_norm": 12.9375, "learning_rate": 4.3799646216844465e-06, "loss": 1.6091, "step": 57960 }, { "epoch": 1.686592780056433, "grad_norm": 12.0625, "learning_rate": 4.378025340011832e-06, "loss": 1.5639, "step": 57980 }, { "epoch": 1.687174564388981, "grad_norm": 14.875, "learning_rate": 4.376086058339217e-06, "loss": 1.5785, "step": 58000 }, { "epoch": 1.6877563487215288, "grad_norm": 14.375, "learning_rate": 4.374146776666602e-06, "loss": 1.5313, "step": 58020 }, { "epoch": 1.688338133054077, "grad_norm": 13.5, "learning_rate": 4.372207494993987e-06, "loss": 1.5673, "step": 58040 }, { "epoch": 1.6889199173866247, "grad_norm": 11.9375, "learning_rate": 4.370268213321372e-06, "loss": 1.5271, "step": 58060 }, { "epoch": 1.6895017017191727, "grad_norm": 12.375, "learning_rate": 4.368328931648757e-06, "loss": 1.5138, "step": 58080 }, { "epoch": 1.6900834860517207, "grad_norm": 14.5625, "learning_rate": 4.366389649976142e-06, "loss": 1.5513, "step": 58100 }, { "epoch": 1.6906652703842684, "grad_norm": 12.625, "learning_rate": 4.364450368303527e-06, "loss": 1.4659, "step": 58120 }, { "epoch": 1.6912470547168166, "grad_norm": 17.125, "learning_rate": 4.362511086630912e-06, "loss": 1.5365, "step": 58140 }, { "epoch": 1.6918288390493643, "grad_norm": 14.5625, "learning_rate": 4.3605718049582975e-06, "loss": 1.5765, "step": 58160 }, { "epoch": 1.6924106233819123, "grad_norm": 14.8125, "learning_rate": 4.358632523285683e-06, "loss": 1.5635, "step": 58180 }, { "epoch": 1.6929924077144602, "grad_norm": 16.25, "learning_rate": 4.356693241613068e-06, "loss": 1.4967, "step": 58200 }, { "epoch": 1.6935741920470082, "grad_norm": 13.0, "learning_rate": 4.354753959940453e-06, "loss": 1.5344, "step": 58220 }, { "epoch": 1.6941559763795562, "grad_norm": 10.25, "learning_rate": 4.352814678267838e-06, "loss": 1.5465, "step": 58240 }, { "epoch": 1.694737760712104, "grad_norm": 12.0625, "learning_rate": 4.350875396595223e-06, "loss": 1.5161, "step": 58260 }, { "epoch": 1.695319545044652, "grad_norm": 12.5625, "learning_rate": 4.348936114922608e-06, "loss": 1.5355, "step": 58280 }, { "epoch": 1.6959013293771998, "grad_norm": 13.8125, "learning_rate": 4.346996833249993e-06, "loss": 1.4878, "step": 58300 }, { "epoch": 1.6964831137097478, "grad_norm": 11.9375, "learning_rate": 4.345057551577378e-06, "loss": 1.5294, "step": 58320 }, { "epoch": 1.6970648980422958, "grad_norm": 16.125, "learning_rate": 4.343118269904763e-06, "loss": 1.4918, "step": 58340 }, { "epoch": 1.6976466823748435, "grad_norm": 14.6875, "learning_rate": 4.3411789882321485e-06, "loss": 1.4957, "step": 58360 }, { "epoch": 1.6982284667073917, "grad_norm": 14.875, "learning_rate": 4.339239706559534e-06, "loss": 1.5475, "step": 58380 }, { "epoch": 1.6988102510399394, "grad_norm": 14.75, "learning_rate": 4.337300424886919e-06, "loss": 1.4932, "step": 58400 }, { "epoch": 1.6993920353724874, "grad_norm": 12.8125, "learning_rate": 4.335361143214303e-06, "loss": 1.5463, "step": 58420 }, { "epoch": 1.6999738197050354, "grad_norm": 11.6875, "learning_rate": 4.333421861541688e-06, "loss": 1.5259, "step": 58440 }, { "epoch": 1.700555604037583, "grad_norm": 10.8125, "learning_rate": 4.331482579869073e-06, "loss": 1.5044, "step": 58460 }, { "epoch": 1.7011373883701313, "grad_norm": 17.875, "learning_rate": 4.329543298196458e-06, "loss": 1.5168, "step": 58480 }, { "epoch": 1.701719172702679, "grad_norm": 11.5625, "learning_rate": 4.327604016523843e-06, "loss": 1.6243, "step": 58500 }, { "epoch": 1.702300957035227, "grad_norm": 13.375, "learning_rate": 4.3256647348512284e-06, "loss": 1.4814, "step": 58520 }, { "epoch": 1.702882741367775, "grad_norm": 13.125, "learning_rate": 4.3237254531786135e-06, "loss": 1.4382, "step": 58540 }, { "epoch": 1.703464525700323, "grad_norm": 18.25, "learning_rate": 4.321786171505999e-06, "loss": 1.5173, "step": 58560 }, { "epoch": 1.7040463100328709, "grad_norm": 16.0, "learning_rate": 4.319846889833384e-06, "loss": 1.54, "step": 58580 }, { "epoch": 1.7046280943654186, "grad_norm": 12.5625, "learning_rate": 4.317907608160769e-06, "loss": 1.4458, "step": 58600 }, { "epoch": 1.7052098786979668, "grad_norm": 11.8125, "learning_rate": 4.315968326488154e-06, "loss": 1.5298, "step": 58620 }, { "epoch": 1.7057916630305145, "grad_norm": 13.625, "learning_rate": 4.314029044815539e-06, "loss": 1.4869, "step": 58640 }, { "epoch": 1.7063734473630625, "grad_norm": 13.0625, "learning_rate": 4.312089763142924e-06, "loss": 1.5136, "step": 58660 }, { "epoch": 1.7069552316956105, "grad_norm": 17.25, "learning_rate": 4.310150481470309e-06, "loss": 1.5782, "step": 58680 }, { "epoch": 1.7075370160281582, "grad_norm": 12.25, "learning_rate": 4.308211199797694e-06, "loss": 1.5273, "step": 58700 }, { "epoch": 1.7081188003607064, "grad_norm": 12.4375, "learning_rate": 4.306271918125079e-06, "loss": 1.5192, "step": 58720 }, { "epoch": 1.7087005846932541, "grad_norm": 13.6875, "learning_rate": 4.3043326364524645e-06, "loss": 1.4448, "step": 58740 }, { "epoch": 1.709282369025802, "grad_norm": 14.75, "learning_rate": 4.30239335477985e-06, "loss": 1.6084, "step": 58760 }, { "epoch": 1.70986415335835, "grad_norm": 12.9375, "learning_rate": 4.300454073107235e-06, "loss": 1.497, "step": 58780 }, { "epoch": 1.710445937690898, "grad_norm": 13.0625, "learning_rate": 4.29851479143462e-06, "loss": 1.5252, "step": 58800 }, { "epoch": 1.711027722023446, "grad_norm": 12.8125, "learning_rate": 4.296575509762005e-06, "loss": 1.4394, "step": 58820 }, { "epoch": 1.7116095063559937, "grad_norm": 14.0625, "learning_rate": 4.29463622808939e-06, "loss": 1.5077, "step": 58840 }, { "epoch": 1.712191290688542, "grad_norm": 13.375, "learning_rate": 4.292696946416775e-06, "loss": 1.5944, "step": 58860 }, { "epoch": 1.7127730750210897, "grad_norm": 12.4375, "learning_rate": 4.29075766474416e-06, "loss": 1.5142, "step": 58880 }, { "epoch": 1.7133548593536376, "grad_norm": 14.0, "learning_rate": 4.288818383071545e-06, "loss": 1.5766, "step": 58900 }, { "epoch": 1.7139366436861856, "grad_norm": 13.625, "learning_rate": 4.28687910139893e-06, "loss": 1.4333, "step": 58920 }, { "epoch": 1.7145184280187333, "grad_norm": 14.125, "learning_rate": 4.2849398197263155e-06, "loss": 1.553, "step": 58940 }, { "epoch": 1.7151002123512815, "grad_norm": 11.625, "learning_rate": 4.2830005380537006e-06, "loss": 1.5117, "step": 58960 }, { "epoch": 1.7156819966838293, "grad_norm": 13.125, "learning_rate": 4.281061256381086e-06, "loss": 1.4692, "step": 58980 }, { "epoch": 1.7162637810163772, "grad_norm": 13.3125, "learning_rate": 4.279121974708471e-06, "loss": 1.5303, "step": 59000 }, { "epoch": 1.7168455653489252, "grad_norm": 13.5, "learning_rate": 4.277182693035856e-06, "loss": 1.503, "step": 59020 }, { "epoch": 1.717427349681473, "grad_norm": 11.5625, "learning_rate": 4.275243411363241e-06, "loss": 1.5862, "step": 59040 }, { "epoch": 1.718009134014021, "grad_norm": 12.4375, "learning_rate": 4.273304129690626e-06, "loss": 1.4501, "step": 59060 }, { "epoch": 1.7185909183465689, "grad_norm": 13.25, "learning_rate": 4.271364848018011e-06, "loss": 1.4513, "step": 59080 }, { "epoch": 1.7191727026791168, "grad_norm": 12.0, "learning_rate": 4.269425566345396e-06, "loss": 1.5399, "step": 59100 }, { "epoch": 1.7197544870116648, "grad_norm": 14.125, "learning_rate": 4.267486284672781e-06, "loss": 1.395, "step": 59120 }, { "epoch": 1.7203362713442127, "grad_norm": 15.3125, "learning_rate": 4.2655470030001664e-06, "loss": 1.5789, "step": 59140 }, { "epoch": 1.7209180556767607, "grad_norm": 11.9375, "learning_rate": 4.2636077213275515e-06, "loss": 1.4762, "step": 59160 }, { "epoch": 1.7214998400093084, "grad_norm": 10.375, "learning_rate": 4.261668439654937e-06, "loss": 1.5355, "step": 59180 }, { "epoch": 1.7220816243418566, "grad_norm": 14.0, "learning_rate": 4.259729157982322e-06, "loss": 1.4571, "step": 59200 }, { "epoch": 1.7226634086744044, "grad_norm": 12.625, "learning_rate": 4.257789876309707e-06, "loss": 1.4757, "step": 59220 }, { "epoch": 1.7232451930069523, "grad_norm": 11.875, "learning_rate": 4.255850594637091e-06, "loss": 1.5687, "step": 59240 }, { "epoch": 1.7238269773395003, "grad_norm": 13.625, "learning_rate": 4.253911312964476e-06, "loss": 1.5222, "step": 59260 }, { "epoch": 1.724408761672048, "grad_norm": 12.9375, "learning_rate": 4.251972031291861e-06, "loss": 1.5058, "step": 59280 }, { "epoch": 1.7249905460045962, "grad_norm": 13.125, "learning_rate": 4.250032749619246e-06, "loss": 1.4596, "step": 59300 }, { "epoch": 1.725572330337144, "grad_norm": 14.5625, "learning_rate": 4.2480934679466315e-06, "loss": 1.5361, "step": 59320 }, { "epoch": 1.726154114669692, "grad_norm": 15.0625, "learning_rate": 4.2461541862740166e-06, "loss": 1.4849, "step": 59340 }, { "epoch": 1.72673589900224, "grad_norm": 13.3125, "learning_rate": 4.244214904601402e-06, "loss": 1.5622, "step": 59360 }, { "epoch": 1.7273176833347876, "grad_norm": 13.25, "learning_rate": 4.242275622928787e-06, "loss": 1.5752, "step": 59380 }, { "epoch": 1.7278994676673358, "grad_norm": 14.0625, "learning_rate": 4.240336341256172e-06, "loss": 1.5047, "step": 59400 }, { "epoch": 1.7284812519998836, "grad_norm": 10.1875, "learning_rate": 4.238397059583557e-06, "loss": 1.4494, "step": 59420 }, { "epoch": 1.7290630363324315, "grad_norm": 11.625, "learning_rate": 4.236457777910942e-06, "loss": 1.3919, "step": 59440 }, { "epoch": 1.7296448206649795, "grad_norm": 11.125, "learning_rate": 4.234518496238327e-06, "loss": 1.4939, "step": 59460 }, { "epoch": 1.7302266049975275, "grad_norm": 15.125, "learning_rate": 4.232579214565712e-06, "loss": 1.5392, "step": 59480 }, { "epoch": 1.7308083893300754, "grad_norm": 17.25, "learning_rate": 4.230639932893097e-06, "loss": 1.5289, "step": 59500 }, { "epoch": 1.7313901736626232, "grad_norm": 13.0, "learning_rate": 4.2287006512204825e-06, "loss": 1.5658, "step": 59520 }, { "epoch": 1.7319719579951713, "grad_norm": 7.28125, "learning_rate": 4.2267613695478675e-06, "loss": 1.4307, "step": 59540 }, { "epoch": 1.732553742327719, "grad_norm": 13.3125, "learning_rate": 4.224822087875253e-06, "loss": 1.5388, "step": 59560 }, { "epoch": 1.733135526660267, "grad_norm": 15.5, "learning_rate": 4.222882806202638e-06, "loss": 1.5646, "step": 59580 }, { "epoch": 1.733717310992815, "grad_norm": 14.75, "learning_rate": 4.220943524530023e-06, "loss": 1.5158, "step": 59600 }, { "epoch": 1.7342990953253627, "grad_norm": 12.9375, "learning_rate": 4.219004242857408e-06, "loss": 1.5818, "step": 59620 }, { "epoch": 1.734880879657911, "grad_norm": 14.875, "learning_rate": 4.217064961184793e-06, "loss": 1.4712, "step": 59640 }, { "epoch": 1.7354626639904587, "grad_norm": 10.8125, "learning_rate": 4.215125679512178e-06, "loss": 1.5987, "step": 59660 }, { "epoch": 1.7360444483230066, "grad_norm": 13.8125, "learning_rate": 4.213186397839563e-06, "loss": 1.5238, "step": 59680 }, { "epoch": 1.7366262326555546, "grad_norm": 11.5625, "learning_rate": 4.211247116166948e-06, "loss": 1.4808, "step": 59700 }, { "epoch": 1.7372080169881026, "grad_norm": 13.0, "learning_rate": 4.2093078344943334e-06, "loss": 1.5405, "step": 59720 }, { "epoch": 1.7377898013206505, "grad_norm": 11.375, "learning_rate": 4.2073685528217185e-06, "loss": 1.49, "step": 59740 }, { "epoch": 1.7383715856531983, "grad_norm": 14.0, "learning_rate": 4.205429271149104e-06, "loss": 1.5166, "step": 59760 }, { "epoch": 1.7389533699857462, "grad_norm": 14.4375, "learning_rate": 4.203489989476489e-06, "loss": 1.5367, "step": 59780 }, { "epoch": 1.7395351543182942, "grad_norm": 11.5, "learning_rate": 4.201550707803874e-06, "loss": 1.5738, "step": 59800 }, { "epoch": 1.7401169386508422, "grad_norm": 15.75, "learning_rate": 4.199611426131259e-06, "loss": 1.4617, "step": 59820 }, { "epoch": 1.7406987229833901, "grad_norm": 15.875, "learning_rate": 4.197672144458643e-06, "loss": 1.6239, "step": 59840 }, { "epoch": 1.7412805073159379, "grad_norm": 12.5, "learning_rate": 4.195732862786028e-06, "loss": 1.5498, "step": 59860 }, { "epoch": 1.741862291648486, "grad_norm": 12.25, "learning_rate": 4.193793581113413e-06, "loss": 1.5468, "step": 59880 }, { "epoch": 1.7424440759810338, "grad_norm": 13.8125, "learning_rate": 4.1918542994407985e-06, "loss": 1.4817, "step": 59900 }, { "epoch": 1.7430258603135818, "grad_norm": 14.75, "learning_rate": 4.1899150177681836e-06, "loss": 1.549, "step": 59920 }, { "epoch": 1.7436076446461297, "grad_norm": 12.375, "learning_rate": 4.187975736095569e-06, "loss": 1.5096, "step": 59940 }, { "epoch": 1.7441894289786775, "grad_norm": 14.3125, "learning_rate": 4.186036454422954e-06, "loss": 1.5118, "step": 59960 }, { "epoch": 1.7447712133112256, "grad_norm": 14.8125, "learning_rate": 4.184097172750339e-06, "loss": 1.6053, "step": 59980 }, { "epoch": 1.7453529976437734, "grad_norm": 15.4375, "learning_rate": 4.182157891077724e-06, "loss": 1.4679, "step": 60000 }, { "epoch": 1.7459347819763213, "grad_norm": 10.4375, "learning_rate": 4.180218609405109e-06, "loss": 1.5594, "step": 60020 }, { "epoch": 1.7465165663088693, "grad_norm": 14.0, "learning_rate": 4.178279327732494e-06, "loss": 1.5039, "step": 60040 }, { "epoch": 1.7470983506414173, "grad_norm": 13.125, "learning_rate": 4.176340046059879e-06, "loss": 1.5033, "step": 60060 }, { "epoch": 1.7476801349739652, "grad_norm": 14.1875, "learning_rate": 4.174400764387264e-06, "loss": 1.5387, "step": 60080 }, { "epoch": 1.748261919306513, "grad_norm": 12.25, "learning_rate": 4.1724614827146494e-06, "loss": 1.5421, "step": 60100 }, { "epoch": 1.7488437036390612, "grad_norm": 13.625, "learning_rate": 4.1705222010420345e-06, "loss": 1.5557, "step": 60120 }, { "epoch": 1.749425487971609, "grad_norm": 13.0625, "learning_rate": 4.16858291936942e-06, "loss": 1.529, "step": 60140 }, { "epoch": 1.7500072723041569, "grad_norm": 13.5625, "learning_rate": 4.166643637696805e-06, "loss": 1.59, "step": 60160 }, { "epoch": 1.7505890566367048, "grad_norm": 14.75, "learning_rate": 4.16470435602419e-06, "loss": 1.5466, "step": 60180 }, { "epoch": 1.7511708409692526, "grad_norm": 14.375, "learning_rate": 4.162765074351575e-06, "loss": 1.5484, "step": 60200 }, { "epoch": 1.7517526253018008, "grad_norm": 13.1875, "learning_rate": 4.16082579267896e-06, "loss": 1.5077, "step": 60220 }, { "epoch": 1.7523344096343485, "grad_norm": 14.6875, "learning_rate": 4.158886511006345e-06, "loss": 1.5449, "step": 60240 }, { "epoch": 1.7529161939668965, "grad_norm": 12.9375, "learning_rate": 4.15694722933373e-06, "loss": 1.5163, "step": 60260 }, { "epoch": 1.7534979782994444, "grad_norm": 12.0625, "learning_rate": 4.155007947661115e-06, "loss": 1.4794, "step": 60280 }, { "epoch": 1.7540797626319922, "grad_norm": 13.625, "learning_rate": 4.1530686659885e-06, "loss": 1.4792, "step": 60300 }, { "epoch": 1.7546615469645404, "grad_norm": 14.5625, "learning_rate": 4.1511293843158855e-06, "loss": 1.554, "step": 60320 }, { "epoch": 1.755243331297088, "grad_norm": 13.625, "learning_rate": 4.14919010264327e-06, "loss": 1.5754, "step": 60340 }, { "epoch": 1.755825115629636, "grad_norm": 12.8125, "learning_rate": 4.147250820970655e-06, "loss": 1.5289, "step": 60360 }, { "epoch": 1.756406899962184, "grad_norm": 10.6875, "learning_rate": 4.14531153929804e-06, "loss": 1.3856, "step": 60380 }, { "epoch": 1.756988684294732, "grad_norm": 13.4375, "learning_rate": 4.143372257625425e-06, "loss": 1.5795, "step": 60400 }, { "epoch": 1.75757046862728, "grad_norm": 13.8125, "learning_rate": 4.14143297595281e-06, "loss": 1.5497, "step": 60420 }, { "epoch": 1.7581522529598277, "grad_norm": 11.0625, "learning_rate": 4.139493694280195e-06, "loss": 1.5081, "step": 60440 }, { "epoch": 1.7587340372923759, "grad_norm": 13.25, "learning_rate": 4.13755441260758e-06, "loss": 1.5566, "step": 60460 }, { "epoch": 1.7593158216249236, "grad_norm": 12.8125, "learning_rate": 4.1356151309349654e-06, "loss": 1.4932, "step": 60480 }, { "epoch": 1.7598976059574716, "grad_norm": 13.4375, "learning_rate": 4.1336758492623505e-06, "loss": 1.5285, "step": 60500 }, { "epoch": 1.7604793902900195, "grad_norm": 14.4375, "learning_rate": 4.131736567589736e-06, "loss": 1.5651, "step": 60520 }, { "epoch": 1.7610611746225673, "grad_norm": 13.8125, "learning_rate": 4.129797285917121e-06, "loss": 1.5124, "step": 60540 }, { "epoch": 1.7616429589551155, "grad_norm": 13.9375, "learning_rate": 4.127858004244506e-06, "loss": 1.4658, "step": 60560 }, { "epoch": 1.7622247432876632, "grad_norm": 17.5, "learning_rate": 4.125918722571891e-06, "loss": 1.4134, "step": 60580 }, { "epoch": 1.7628065276202112, "grad_norm": 12.375, "learning_rate": 4.123979440899276e-06, "loss": 1.5309, "step": 60600 }, { "epoch": 1.7633883119527591, "grad_norm": 13.4375, "learning_rate": 4.122040159226661e-06, "loss": 1.529, "step": 60620 }, { "epoch": 1.7639700962853069, "grad_norm": 12.6875, "learning_rate": 4.120100877554046e-06, "loss": 1.5403, "step": 60640 }, { "epoch": 1.764551880617855, "grad_norm": 13.1875, "learning_rate": 4.118161595881431e-06, "loss": 1.4753, "step": 60660 }, { "epoch": 1.7651336649504028, "grad_norm": 18.125, "learning_rate": 4.116222314208816e-06, "loss": 1.5695, "step": 60680 }, { "epoch": 1.7657154492829508, "grad_norm": 13.25, "learning_rate": 4.1142830325362015e-06, "loss": 1.5104, "step": 60700 }, { "epoch": 1.7662972336154987, "grad_norm": 12.6875, "learning_rate": 4.112343750863587e-06, "loss": 1.5038, "step": 60720 }, { "epoch": 1.7668790179480467, "grad_norm": 12.875, "learning_rate": 4.110404469190972e-06, "loss": 1.4563, "step": 60740 }, { "epoch": 1.7674608022805947, "grad_norm": 13.0, "learning_rate": 4.108465187518357e-06, "loss": 1.5103, "step": 60760 }, { "epoch": 1.7680425866131424, "grad_norm": 12.4375, "learning_rate": 4.106525905845742e-06, "loss": 1.4324, "step": 60780 }, { "epoch": 1.7686243709456906, "grad_norm": 14.0625, "learning_rate": 4.104586624173127e-06, "loss": 1.4796, "step": 60800 }, { "epoch": 1.7692061552782383, "grad_norm": 13.3125, "learning_rate": 4.102647342500512e-06, "loss": 1.5313, "step": 60820 }, { "epoch": 1.7697879396107863, "grad_norm": 13.0, "learning_rate": 4.100708060827897e-06, "loss": 1.5113, "step": 60840 }, { "epoch": 1.7703697239433343, "grad_norm": 13.5, "learning_rate": 4.098768779155282e-06, "loss": 1.5719, "step": 60860 }, { "epoch": 1.770951508275882, "grad_norm": 15.25, "learning_rate": 4.096829497482667e-06, "loss": 1.5592, "step": 60880 }, { "epoch": 1.7715332926084302, "grad_norm": 14.5, "learning_rate": 4.0948902158100525e-06, "loss": 1.5779, "step": 60900 }, { "epoch": 1.772115076940978, "grad_norm": 13.5625, "learning_rate": 4.092950934137438e-06, "loss": 1.5096, "step": 60920 }, { "epoch": 1.7726968612735259, "grad_norm": 14.0625, "learning_rate": 4.091011652464823e-06, "loss": 1.5509, "step": 60940 }, { "epoch": 1.7732786456060738, "grad_norm": 14.3125, "learning_rate": 4.089072370792208e-06, "loss": 1.5349, "step": 60960 }, { "epoch": 1.7738604299386218, "grad_norm": 14.4375, "learning_rate": 4.087133089119593e-06, "loss": 1.5832, "step": 60980 }, { "epoch": 1.7744422142711698, "grad_norm": 13.5, "learning_rate": 4.085193807446978e-06, "loss": 1.5272, "step": 61000 }, { "epoch": 1.7750239986037175, "grad_norm": 11.375, "learning_rate": 4.083254525774363e-06, "loss": 1.4585, "step": 61020 }, { "epoch": 1.7756057829362655, "grad_norm": 13.125, "learning_rate": 4.081315244101748e-06, "loss": 1.3838, "step": 61040 }, { "epoch": 1.7761875672688134, "grad_norm": 12.6875, "learning_rate": 4.079375962429133e-06, "loss": 1.6207, "step": 61060 }, { "epoch": 1.7767693516013614, "grad_norm": 13.6875, "learning_rate": 4.077436680756518e-06, "loss": 1.4856, "step": 61080 }, { "epoch": 1.7773511359339094, "grad_norm": 13.25, "learning_rate": 4.0754973990839035e-06, "loss": 1.5682, "step": 61100 }, { "epoch": 1.777932920266457, "grad_norm": 14.0625, "learning_rate": 4.0735581174112886e-06, "loss": 1.577, "step": 61120 }, { "epoch": 1.7785147045990053, "grad_norm": 14.8125, "learning_rate": 4.071618835738673e-06, "loss": 1.3515, "step": 61140 }, { "epoch": 1.779096488931553, "grad_norm": 13.125, "learning_rate": 4.069679554066058e-06, "loss": 1.5357, "step": 61160 }, { "epoch": 1.779678273264101, "grad_norm": 12.0, "learning_rate": 4.067740272393443e-06, "loss": 1.4878, "step": 61180 }, { "epoch": 1.780260057596649, "grad_norm": 13.9375, "learning_rate": 4.065800990720828e-06, "loss": 1.5168, "step": 61200 }, { "epoch": 1.7808418419291967, "grad_norm": 13.5625, "learning_rate": 4.063861709048213e-06, "loss": 1.5326, "step": 61220 }, { "epoch": 1.7814236262617449, "grad_norm": 14.25, "learning_rate": 4.061922427375598e-06, "loss": 1.4947, "step": 61240 }, { "epoch": 1.7820054105942926, "grad_norm": 13.25, "learning_rate": 4.059983145702983e-06, "loss": 1.559, "step": 61260 }, { "epoch": 1.7825871949268406, "grad_norm": 14.6875, "learning_rate": 4.0580438640303685e-06, "loss": 1.5615, "step": 61280 }, { "epoch": 1.7831689792593886, "grad_norm": 13.6875, "learning_rate": 4.056104582357754e-06, "loss": 1.6296, "step": 61300 }, { "epoch": 1.7837507635919365, "grad_norm": 15.0, "learning_rate": 4.054165300685139e-06, "loss": 1.4333, "step": 61320 }, { "epoch": 1.7843325479244845, "grad_norm": 13.1875, "learning_rate": 4.052226019012524e-06, "loss": 1.6036, "step": 61340 }, { "epoch": 1.7849143322570322, "grad_norm": 12.625, "learning_rate": 4.050286737339909e-06, "loss": 1.5461, "step": 61360 }, { "epoch": 1.7854961165895804, "grad_norm": 14.25, "learning_rate": 4.048347455667294e-06, "loss": 1.5809, "step": 61380 }, { "epoch": 1.7860779009221281, "grad_norm": 12.75, "learning_rate": 4.046408173994679e-06, "loss": 1.523, "step": 61400 }, { "epoch": 1.786659685254676, "grad_norm": 12.625, "learning_rate": 4.044468892322064e-06, "loss": 1.4993, "step": 61420 }, { "epoch": 1.787241469587224, "grad_norm": 11.375, "learning_rate": 4.042529610649449e-06, "loss": 1.4793, "step": 61440 }, { "epoch": 1.7878232539197718, "grad_norm": 13.625, "learning_rate": 4.040590328976834e-06, "loss": 1.5591, "step": 61460 }, { "epoch": 1.78840503825232, "grad_norm": 14.6875, "learning_rate": 4.0386510473042195e-06, "loss": 1.6111, "step": 61480 }, { "epoch": 1.7889868225848677, "grad_norm": 17.625, "learning_rate": 4.0367117656316046e-06, "loss": 1.503, "step": 61500 }, { "epoch": 1.7895686069174157, "grad_norm": 15.625, "learning_rate": 4.03477248395899e-06, "loss": 1.5823, "step": 61520 }, { "epoch": 1.7901503912499637, "grad_norm": 14.375, "learning_rate": 4.032833202286375e-06, "loss": 1.5218, "step": 61540 }, { "epoch": 1.7907321755825114, "grad_norm": 12.875, "learning_rate": 4.03089392061376e-06, "loss": 1.4664, "step": 61560 }, { "epoch": 1.7913139599150596, "grad_norm": 16.375, "learning_rate": 4.028954638941145e-06, "loss": 1.4916, "step": 61580 }, { "epoch": 1.7918957442476073, "grad_norm": 14.75, "learning_rate": 4.02701535726853e-06, "loss": 1.5355, "step": 61600 }, { "epoch": 1.7924775285801553, "grad_norm": 15.1875, "learning_rate": 4.025076075595915e-06, "loss": 1.5685, "step": 61620 }, { "epoch": 1.7930593129127033, "grad_norm": 14.875, "learning_rate": 4.0231367939233e-06, "loss": 1.6289, "step": 61640 }, { "epoch": 1.7936410972452512, "grad_norm": 12.8125, "learning_rate": 4.021197512250685e-06, "loss": 1.4561, "step": 61660 }, { "epoch": 1.7942228815777992, "grad_norm": 12.0625, "learning_rate": 4.0192582305780704e-06, "loss": 1.4562, "step": 61680 }, { "epoch": 1.794804665910347, "grad_norm": 13.5, "learning_rate": 4.0173189489054555e-06, "loss": 1.4413, "step": 61700 }, { "epoch": 1.7953864502428951, "grad_norm": 15.25, "learning_rate": 4.015379667232841e-06, "loss": 1.4938, "step": 61720 }, { "epoch": 1.7959682345754429, "grad_norm": 11.1875, "learning_rate": 4.013440385560226e-06, "loss": 1.5027, "step": 61740 }, { "epoch": 1.7965500189079908, "grad_norm": 13.875, "learning_rate": 4.011501103887611e-06, "loss": 1.5781, "step": 61760 }, { "epoch": 1.7971318032405388, "grad_norm": 13.8125, "learning_rate": 4.009561822214996e-06, "loss": 1.4809, "step": 61780 }, { "epoch": 1.7977135875730865, "grad_norm": 12.1875, "learning_rate": 4.007622540542381e-06, "loss": 1.5166, "step": 61800 }, { "epoch": 1.7982953719056347, "grad_norm": 7.1875, "learning_rate": 4.005683258869766e-06, "loss": 1.4717, "step": 61820 }, { "epoch": 1.7988771562381825, "grad_norm": 11.0, "learning_rate": 4.003743977197151e-06, "loss": 1.5472, "step": 61840 }, { "epoch": 1.7994589405707304, "grad_norm": 13.25, "learning_rate": 4.001804695524536e-06, "loss": 1.4978, "step": 61860 }, { "epoch": 1.8000407249032784, "grad_norm": 11.0, "learning_rate": 3.999865413851921e-06, "loss": 1.505, "step": 61880 }, { "epoch": 1.8006225092358261, "grad_norm": 17.0, "learning_rate": 3.9979261321793065e-06, "loss": 1.5679, "step": 61900 }, { "epoch": 1.8012042935683743, "grad_norm": 11.375, "learning_rate": 3.995986850506692e-06, "loss": 1.4906, "step": 61920 }, { "epoch": 1.801786077900922, "grad_norm": 12.6875, "learning_rate": 3.994047568834077e-06, "loss": 1.4992, "step": 61940 }, { "epoch": 1.80236786223347, "grad_norm": 10.8125, "learning_rate": 3.992108287161461e-06, "loss": 1.5971, "step": 61960 }, { "epoch": 1.802949646566018, "grad_norm": 12.875, "learning_rate": 3.990169005488846e-06, "loss": 1.4525, "step": 61980 }, { "epoch": 1.803531430898566, "grad_norm": 12.3125, "learning_rate": 3.988229723816231e-06, "loss": 1.5309, "step": 62000 }, { "epoch": 1.804113215231114, "grad_norm": 12.8125, "learning_rate": 3.986290442143616e-06, "loss": 1.5509, "step": 62020 }, { "epoch": 1.8046949995636616, "grad_norm": 11.0, "learning_rate": 3.984351160471001e-06, "loss": 1.5824, "step": 62040 }, { "epoch": 1.8052767838962098, "grad_norm": 14.0, "learning_rate": 3.9824118787983864e-06, "loss": 1.5651, "step": 62060 }, { "epoch": 1.8058585682287576, "grad_norm": 12.1875, "learning_rate": 3.9804725971257715e-06, "loss": 1.5661, "step": 62080 }, { "epoch": 1.8064403525613055, "grad_norm": 11.3125, "learning_rate": 3.978533315453157e-06, "loss": 1.5185, "step": 62100 }, { "epoch": 1.8070221368938535, "grad_norm": 12.0, "learning_rate": 3.976594033780542e-06, "loss": 1.6406, "step": 62120 }, { "epoch": 1.8076039212264012, "grad_norm": 13.25, "learning_rate": 3.974654752107927e-06, "loss": 1.5302, "step": 62140 }, { "epoch": 1.8081857055589494, "grad_norm": 13.5625, "learning_rate": 3.972715470435312e-06, "loss": 1.5314, "step": 62160 }, { "epoch": 1.8087674898914972, "grad_norm": 14.3125, "learning_rate": 3.970776188762697e-06, "loss": 1.6031, "step": 62180 }, { "epoch": 1.8093492742240451, "grad_norm": 12.0, "learning_rate": 3.968836907090082e-06, "loss": 1.4817, "step": 62200 }, { "epoch": 1.809931058556593, "grad_norm": 13.8125, "learning_rate": 3.966897625417467e-06, "loss": 1.585, "step": 62220 }, { "epoch": 1.810512842889141, "grad_norm": 13.75, "learning_rate": 3.9649583437448515e-06, "loss": 1.513, "step": 62240 }, { "epoch": 1.811094627221689, "grad_norm": 10.8125, "learning_rate": 3.9630190620722366e-06, "loss": 1.5597, "step": 62260 }, { "epoch": 1.8116764115542368, "grad_norm": 11.375, "learning_rate": 3.961079780399622e-06, "loss": 1.5187, "step": 62280 }, { "epoch": 1.8122581958867847, "grad_norm": 13.6875, "learning_rate": 3.959140498727007e-06, "loss": 1.4977, "step": 62300 }, { "epoch": 1.8128399802193327, "grad_norm": 11.125, "learning_rate": 3.957201217054392e-06, "loss": 1.5414, "step": 62320 }, { "epoch": 1.8134217645518806, "grad_norm": 13.0, "learning_rate": 3.955261935381777e-06, "loss": 1.5543, "step": 62340 }, { "epoch": 1.8140035488844286, "grad_norm": 11.9375, "learning_rate": 3.953322653709162e-06, "loss": 1.4898, "step": 62360 }, { "epoch": 1.8145853332169763, "grad_norm": 12.9375, "learning_rate": 3.951383372036547e-06, "loss": 1.4591, "step": 62380 }, { "epoch": 1.8151671175495245, "grad_norm": 12.75, "learning_rate": 3.949444090363932e-06, "loss": 1.5138, "step": 62400 }, { "epoch": 1.8157489018820723, "grad_norm": 14.0, "learning_rate": 3.947504808691317e-06, "loss": 1.5751, "step": 62420 }, { "epoch": 1.8163306862146202, "grad_norm": 11.125, "learning_rate": 3.9455655270187024e-06, "loss": 1.4689, "step": 62440 }, { "epoch": 1.8169124705471682, "grad_norm": 12.8125, "learning_rate": 3.9436262453460875e-06, "loss": 1.5339, "step": 62460 }, { "epoch": 1.817494254879716, "grad_norm": 10.0, "learning_rate": 3.941686963673473e-06, "loss": 1.5708, "step": 62480 }, { "epoch": 1.8180760392122641, "grad_norm": 12.6875, "learning_rate": 3.939747682000858e-06, "loss": 1.5428, "step": 62500 }, { "epoch": 1.8186578235448119, "grad_norm": 13.0625, "learning_rate": 3.937808400328243e-06, "loss": 1.4831, "step": 62520 }, { "epoch": 1.8192396078773598, "grad_norm": 13.3125, "learning_rate": 3.935869118655628e-06, "loss": 1.5751, "step": 62540 }, { "epoch": 1.8198213922099078, "grad_norm": 13.0625, "learning_rate": 3.933929836983013e-06, "loss": 1.4526, "step": 62560 }, { "epoch": 1.8204031765424558, "grad_norm": 13.25, "learning_rate": 3.931990555310398e-06, "loss": 1.4694, "step": 62580 }, { "epoch": 1.8209849608750037, "grad_norm": 12.9375, "learning_rate": 3.930051273637783e-06, "loss": 1.5837, "step": 62600 }, { "epoch": 1.8215667452075515, "grad_norm": 13.6875, "learning_rate": 3.928111991965168e-06, "loss": 1.5205, "step": 62620 }, { "epoch": 1.8221485295400996, "grad_norm": 13.3125, "learning_rate": 3.926172710292553e-06, "loss": 1.4634, "step": 62640 }, { "epoch": 1.8227303138726474, "grad_norm": 11.6875, "learning_rate": 3.9242334286199385e-06, "loss": 1.593, "step": 62660 }, { "epoch": 1.8233120982051954, "grad_norm": 13.9375, "learning_rate": 3.922294146947324e-06, "loss": 1.4876, "step": 62680 }, { "epoch": 1.8238938825377433, "grad_norm": 15.5625, "learning_rate": 3.920354865274709e-06, "loss": 1.5392, "step": 62700 }, { "epoch": 1.824475666870291, "grad_norm": 9.25, "learning_rate": 3.918415583602094e-06, "loss": 1.5972, "step": 62720 }, { "epoch": 1.8250574512028392, "grad_norm": 12.8125, "learning_rate": 3.916476301929479e-06, "loss": 1.4216, "step": 62740 }, { "epoch": 1.825639235535387, "grad_norm": 14.3125, "learning_rate": 3.914537020256864e-06, "loss": 1.5259, "step": 62760 }, { "epoch": 1.826221019867935, "grad_norm": 16.0, "learning_rate": 3.912597738584249e-06, "loss": 1.5084, "step": 62780 }, { "epoch": 1.826802804200483, "grad_norm": 12.6875, "learning_rate": 3.910658456911634e-06, "loss": 1.583, "step": 62800 }, { "epoch": 1.8273845885330307, "grad_norm": 13.375, "learning_rate": 3.908719175239019e-06, "loss": 1.5648, "step": 62820 }, { "epoch": 1.8279663728655788, "grad_norm": 13.875, "learning_rate": 3.906779893566404e-06, "loss": 1.5481, "step": 62840 }, { "epoch": 1.8285481571981266, "grad_norm": 13.3125, "learning_rate": 3.9048406118937895e-06, "loss": 1.6228, "step": 62860 }, { "epoch": 1.8291299415306745, "grad_norm": 15.625, "learning_rate": 3.902901330221175e-06, "loss": 1.4922, "step": 62880 }, { "epoch": 1.8297117258632225, "grad_norm": 15.75, "learning_rate": 3.90096204854856e-06, "loss": 1.5099, "step": 62900 }, { "epoch": 1.8302935101957705, "grad_norm": 11.6875, "learning_rate": 3.899022766875945e-06, "loss": 1.5234, "step": 62920 }, { "epoch": 1.8308752945283184, "grad_norm": 13.5625, "learning_rate": 3.89708348520333e-06, "loss": 1.5024, "step": 62940 }, { "epoch": 1.8314570788608662, "grad_norm": 13.5625, "learning_rate": 3.895144203530715e-06, "loss": 1.5681, "step": 62960 }, { "epoch": 1.8320388631934144, "grad_norm": 10.6875, "learning_rate": 3.8932049218581e-06, "loss": 1.5527, "step": 62980 }, { "epoch": 1.832620647525962, "grad_norm": 13.6875, "learning_rate": 3.891265640185485e-06, "loss": 1.4973, "step": 63000 }, { "epoch": 1.83320243185851, "grad_norm": 13.9375, "learning_rate": 3.88932635851287e-06, "loss": 1.5763, "step": 63020 }, { "epoch": 1.833784216191058, "grad_norm": 12.3125, "learning_rate": 3.887387076840255e-06, "loss": 1.5511, "step": 63040 }, { "epoch": 1.8343660005236058, "grad_norm": 14.0625, "learning_rate": 3.88544779516764e-06, "loss": 1.5727, "step": 63060 }, { "epoch": 1.834947784856154, "grad_norm": 21.375, "learning_rate": 3.883508513495025e-06, "loss": 1.4631, "step": 63080 }, { "epoch": 1.8355295691887017, "grad_norm": 15.0, "learning_rate": 3.88156923182241e-06, "loss": 1.5165, "step": 63100 }, { "epoch": 1.8361113535212497, "grad_norm": 12.9375, "learning_rate": 3.879629950149795e-06, "loss": 1.5668, "step": 63120 }, { "epoch": 1.8366931378537976, "grad_norm": 17.125, "learning_rate": 3.87769066847718e-06, "loss": 1.5744, "step": 63140 }, { "epoch": 1.8372749221863454, "grad_norm": 13.9375, "learning_rate": 3.875751386804565e-06, "loss": 1.5037, "step": 63160 }, { "epoch": 1.8378567065188935, "grad_norm": 14.0625, "learning_rate": 3.87381210513195e-06, "loss": 1.5699, "step": 63180 }, { "epoch": 1.8384384908514413, "grad_norm": 15.875, "learning_rate": 3.871872823459335e-06, "loss": 1.5373, "step": 63200 }, { "epoch": 1.8390202751839893, "grad_norm": 11.25, "learning_rate": 3.86993354178672e-06, "loss": 1.5129, "step": 63220 }, { "epoch": 1.8396020595165372, "grad_norm": 14.5625, "learning_rate": 3.8679942601141055e-06, "loss": 1.499, "step": 63240 }, { "epoch": 1.8401838438490852, "grad_norm": 13.625, "learning_rate": 3.866054978441491e-06, "loss": 1.5906, "step": 63260 }, { "epoch": 1.8407656281816331, "grad_norm": 12.75, "learning_rate": 3.864115696768876e-06, "loss": 1.466, "step": 63280 }, { "epoch": 1.8413474125141809, "grad_norm": 12.25, "learning_rate": 3.862176415096261e-06, "loss": 1.5761, "step": 63300 }, { "epoch": 1.841929196846729, "grad_norm": 13.75, "learning_rate": 3.860237133423646e-06, "loss": 1.4375, "step": 63320 }, { "epoch": 1.8425109811792768, "grad_norm": 11.5, "learning_rate": 3.858297851751031e-06, "loss": 1.4889, "step": 63340 }, { "epoch": 1.8430927655118248, "grad_norm": 14.125, "learning_rate": 3.856358570078416e-06, "loss": 1.5614, "step": 63360 }, { "epoch": 1.8436745498443727, "grad_norm": 13.6875, "learning_rate": 3.854419288405801e-06, "loss": 1.5795, "step": 63380 }, { "epoch": 1.8442563341769205, "grad_norm": 16.375, "learning_rate": 3.852480006733186e-06, "loss": 1.5741, "step": 63400 }, { "epoch": 1.8448381185094687, "grad_norm": 11.4375, "learning_rate": 3.850540725060571e-06, "loss": 1.5355, "step": 63420 }, { "epoch": 1.8454199028420164, "grad_norm": 12.3125, "learning_rate": 3.8486014433879565e-06, "loss": 1.4689, "step": 63440 }, { "epoch": 1.8460016871745644, "grad_norm": 12.3125, "learning_rate": 3.8466621617153416e-06, "loss": 1.5845, "step": 63460 }, { "epoch": 1.8465834715071123, "grad_norm": 13.0625, "learning_rate": 3.844722880042727e-06, "loss": 1.5011, "step": 63480 }, { "epoch": 1.8471652558396603, "grad_norm": 14.6875, "learning_rate": 3.842783598370112e-06, "loss": 1.5878, "step": 63500 }, { "epoch": 1.8477470401722083, "grad_norm": 11.8125, "learning_rate": 3.840844316697497e-06, "loss": 1.536, "step": 63520 }, { "epoch": 1.848328824504756, "grad_norm": 13.625, "learning_rate": 3.838905035024882e-06, "loss": 1.619, "step": 63540 }, { "epoch": 1.848910608837304, "grad_norm": 14.8125, "learning_rate": 3.836965753352267e-06, "loss": 1.5491, "step": 63560 }, { "epoch": 1.849492393169852, "grad_norm": 14.6875, "learning_rate": 3.835026471679652e-06, "loss": 1.4322, "step": 63580 }, { "epoch": 1.8500741775023999, "grad_norm": 10.125, "learning_rate": 3.833087190007037e-06, "loss": 1.5235, "step": 63600 }, { "epoch": 1.8506559618349478, "grad_norm": 10.375, "learning_rate": 3.831147908334422e-06, "loss": 1.4756, "step": 63620 }, { "epoch": 1.8512377461674956, "grad_norm": 14.1875, "learning_rate": 3.8292086266618074e-06, "loss": 1.5428, "step": 63640 }, { "epoch": 1.8518195305000438, "grad_norm": 12.8125, "learning_rate": 3.8272693449891925e-06, "loss": 1.4258, "step": 63660 }, { "epoch": 1.8524013148325915, "grad_norm": 14.4375, "learning_rate": 3.825330063316578e-06, "loss": 1.5605, "step": 63680 }, { "epoch": 1.8529830991651395, "grad_norm": 11.9375, "learning_rate": 3.823390781643963e-06, "loss": 1.6128, "step": 63700 }, { "epoch": 1.8535648834976874, "grad_norm": 14.125, "learning_rate": 3.821451499971348e-06, "loss": 1.4939, "step": 63720 }, { "epoch": 1.8541466678302352, "grad_norm": 14.125, "learning_rate": 3.819512218298733e-06, "loss": 1.5789, "step": 63740 }, { "epoch": 1.8547284521627834, "grad_norm": 13.6875, "learning_rate": 3.817572936626118e-06, "loss": 1.5197, "step": 63760 }, { "epoch": 1.855310236495331, "grad_norm": 13.9375, "learning_rate": 3.815633654953503e-06, "loss": 1.5028, "step": 63780 }, { "epoch": 1.855892020827879, "grad_norm": 11.125, "learning_rate": 3.813694373280888e-06, "loss": 1.5733, "step": 63800 }, { "epoch": 1.856473805160427, "grad_norm": 13.375, "learning_rate": 3.811755091608273e-06, "loss": 1.4623, "step": 63820 }, { "epoch": 1.857055589492975, "grad_norm": 11.75, "learning_rate": 3.809815809935658e-06, "loss": 1.5711, "step": 63840 }, { "epoch": 1.857637373825523, "grad_norm": 13.625, "learning_rate": 3.807876528263043e-06, "loss": 1.5969, "step": 63860 }, { "epoch": 1.8582191581580707, "grad_norm": 13.0625, "learning_rate": 3.805937246590428e-06, "loss": 1.593, "step": 63880 }, { "epoch": 1.858800942490619, "grad_norm": 14.375, "learning_rate": 3.8039979649178133e-06, "loss": 1.5419, "step": 63900 }, { "epoch": 1.8593827268231666, "grad_norm": 12.375, "learning_rate": 3.8020586832451984e-06, "loss": 1.5262, "step": 63920 }, { "epoch": 1.8599645111557146, "grad_norm": 14.125, "learning_rate": 3.8001194015725835e-06, "loss": 1.5379, "step": 63940 }, { "epoch": 1.8605462954882626, "grad_norm": 14.0, "learning_rate": 3.7981801198999686e-06, "loss": 1.4929, "step": 63960 }, { "epoch": 1.8611280798208103, "grad_norm": 12.5, "learning_rate": 3.7962408382273537e-06, "loss": 1.5153, "step": 63980 }, { "epoch": 1.8617098641533585, "grad_norm": 14.3125, "learning_rate": 3.7943015565547388e-06, "loss": 1.4638, "step": 64000 }, { "epoch": 1.8622916484859062, "grad_norm": 13.1875, "learning_rate": 3.792362274882124e-06, "loss": 1.5608, "step": 64020 }, { "epoch": 1.8628734328184542, "grad_norm": 14.0, "learning_rate": 3.790422993209509e-06, "loss": 1.5902, "step": 64040 }, { "epoch": 1.8634552171510022, "grad_norm": 13.125, "learning_rate": 3.788483711536894e-06, "loss": 1.4654, "step": 64060 }, { "epoch": 1.86403700148355, "grad_norm": 13.625, "learning_rate": 3.7865444298642783e-06, "loss": 1.5492, "step": 64080 }, { "epoch": 1.864618785816098, "grad_norm": 13.5625, "learning_rate": 3.7846051481916634e-06, "loss": 1.5703, "step": 64100 }, { "epoch": 1.8652005701486458, "grad_norm": 12.4375, "learning_rate": 3.7826658665190485e-06, "loss": 1.4844, "step": 64120 }, { "epoch": 1.8657823544811938, "grad_norm": 13.75, "learning_rate": 3.7807265848464336e-06, "loss": 1.5887, "step": 64140 }, { "epoch": 1.8663641388137417, "grad_norm": 15.25, "learning_rate": 3.7787873031738187e-06, "loss": 1.5731, "step": 64160 }, { "epoch": 1.8669459231462897, "grad_norm": 15.875, "learning_rate": 3.776848021501204e-06, "loss": 1.5551, "step": 64180 }, { "epoch": 1.8675277074788377, "grad_norm": 14.125, "learning_rate": 3.774908739828589e-06, "loss": 1.5833, "step": 64200 }, { "epoch": 1.8681094918113854, "grad_norm": 13.9375, "learning_rate": 3.772969458155974e-06, "loss": 1.5592, "step": 64220 }, { "epoch": 1.8686912761439336, "grad_norm": 11.1875, "learning_rate": 3.771030176483359e-06, "loss": 1.4486, "step": 64240 }, { "epoch": 1.8692730604764813, "grad_norm": 12.4375, "learning_rate": 3.769090894810744e-06, "loss": 1.5575, "step": 64260 }, { "epoch": 1.8698548448090293, "grad_norm": 14.0625, "learning_rate": 3.7671516131381293e-06, "loss": 1.4613, "step": 64280 }, { "epoch": 1.8704366291415773, "grad_norm": 13.375, "learning_rate": 3.7652123314655144e-06, "loss": 1.5542, "step": 64300 }, { "epoch": 1.871018413474125, "grad_norm": 14.4375, "learning_rate": 3.7632730497928995e-06, "loss": 1.449, "step": 64320 }, { "epoch": 1.8716001978066732, "grad_norm": 14.25, "learning_rate": 3.7613337681202846e-06, "loss": 1.5446, "step": 64340 }, { "epoch": 1.872181982139221, "grad_norm": 12.0625, "learning_rate": 3.7593944864476693e-06, "loss": 1.5131, "step": 64360 }, { "epoch": 1.872763766471769, "grad_norm": 13.375, "learning_rate": 3.7574552047750544e-06, "loss": 1.4918, "step": 64380 }, { "epoch": 1.8733455508043169, "grad_norm": 14.25, "learning_rate": 3.7555159231024395e-06, "loss": 1.5377, "step": 64400 }, { "epoch": 1.8739273351368646, "grad_norm": 14.125, "learning_rate": 3.7535766414298246e-06, "loss": 1.5263, "step": 64420 }, { "epoch": 1.8745091194694128, "grad_norm": 12.75, "learning_rate": 3.7516373597572097e-06, "loss": 1.5512, "step": 64440 }, { "epoch": 1.8750909038019605, "grad_norm": 12.5625, "learning_rate": 3.7496980780845948e-06, "loss": 1.5967, "step": 64460 }, { "epoch": 1.8756726881345085, "grad_norm": 12.875, "learning_rate": 3.74775879641198e-06, "loss": 1.5346, "step": 64480 }, { "epoch": 1.8762544724670565, "grad_norm": 12.1875, "learning_rate": 3.745819514739365e-06, "loss": 1.5279, "step": 64500 }, { "epoch": 1.8768362567996044, "grad_norm": 14.3125, "learning_rate": 3.74388023306675e-06, "loss": 1.5636, "step": 64520 }, { "epoch": 1.8774180411321524, "grad_norm": 13.625, "learning_rate": 3.741940951394135e-06, "loss": 1.5338, "step": 64540 }, { "epoch": 1.8779998254647001, "grad_norm": 12.1875, "learning_rate": 3.7400016697215202e-06, "loss": 1.5769, "step": 64560 }, { "epoch": 1.8785816097972483, "grad_norm": 13.5, "learning_rate": 3.7380623880489053e-06, "loss": 1.5317, "step": 64580 }, { "epoch": 1.879163394129796, "grad_norm": 14.0625, "learning_rate": 3.7361231063762904e-06, "loss": 1.4931, "step": 64600 }, { "epoch": 1.879745178462344, "grad_norm": 12.5625, "learning_rate": 3.7341838247036755e-06, "loss": 1.5744, "step": 64620 }, { "epoch": 1.880326962794892, "grad_norm": 11.1875, "learning_rate": 3.7322445430310606e-06, "loss": 1.5326, "step": 64640 }, { "epoch": 1.8809087471274397, "grad_norm": 12.5, "learning_rate": 3.7303052613584457e-06, "loss": 1.5492, "step": 64660 }, { "epoch": 1.881490531459988, "grad_norm": 9.9375, "learning_rate": 3.728365979685831e-06, "loss": 1.4694, "step": 64680 }, { "epoch": 1.8820723157925356, "grad_norm": 14.0, "learning_rate": 3.726426698013216e-06, "loss": 1.541, "step": 64700 }, { "epoch": 1.8826541001250836, "grad_norm": 13.9375, "learning_rate": 3.724487416340601e-06, "loss": 1.5134, "step": 64720 }, { "epoch": 1.8832358844576316, "grad_norm": 15.1875, "learning_rate": 3.722548134667986e-06, "loss": 1.5849, "step": 64740 }, { "epoch": 1.8838176687901795, "grad_norm": 14.0, "learning_rate": 3.720608852995371e-06, "loss": 1.4249, "step": 64760 }, { "epoch": 1.8843994531227275, "grad_norm": 13.0, "learning_rate": 3.718669571322756e-06, "loss": 1.5384, "step": 64780 }, { "epoch": 1.8849812374552752, "grad_norm": 13.6875, "learning_rate": 3.716730289650141e-06, "loss": 1.5399, "step": 64800 }, { "epoch": 1.8855630217878232, "grad_norm": 13.0, "learning_rate": 3.714791007977526e-06, "loss": 1.5114, "step": 64820 }, { "epoch": 1.8861448061203712, "grad_norm": 11.4375, "learning_rate": 3.712851726304911e-06, "loss": 1.522, "step": 64840 }, { "epoch": 1.8867265904529191, "grad_norm": 16.125, "learning_rate": 3.7109124446322963e-06, "loss": 1.4984, "step": 64860 }, { "epoch": 1.887308374785467, "grad_norm": 14.5625, "learning_rate": 3.7089731629596814e-06, "loss": 1.4274, "step": 64880 }, { "epoch": 1.8878901591180148, "grad_norm": 11.8125, "learning_rate": 3.7070338812870665e-06, "loss": 1.556, "step": 64900 }, { "epoch": 1.888471943450563, "grad_norm": 13.0625, "learning_rate": 3.7050945996144516e-06, "loss": 1.5648, "step": 64920 }, { "epoch": 1.8890537277831108, "grad_norm": 15.25, "learning_rate": 3.7031553179418367e-06, "loss": 1.6345, "step": 64940 }, { "epoch": 1.8896355121156587, "grad_norm": 13.25, "learning_rate": 3.7012160362692218e-06, "loss": 1.5228, "step": 64960 }, { "epoch": 1.8902172964482067, "grad_norm": 14.4375, "learning_rate": 3.699276754596607e-06, "loss": 1.4963, "step": 64980 }, { "epoch": 1.8907990807807544, "grad_norm": 13.6875, "learning_rate": 3.697337472923992e-06, "loss": 1.5216, "step": 65000 }, { "epoch": 1.8913808651133026, "grad_norm": 14.5625, "learning_rate": 3.695398191251377e-06, "loss": 1.4629, "step": 65020 }, { "epoch": 1.8919626494458504, "grad_norm": 14.8125, "learning_rate": 3.693458909578762e-06, "loss": 1.4844, "step": 65040 }, { "epoch": 1.8925444337783983, "grad_norm": 11.75, "learning_rate": 3.6915196279061473e-06, "loss": 1.6662, "step": 65060 }, { "epoch": 1.8931262181109463, "grad_norm": 16.75, "learning_rate": 3.6895803462335323e-06, "loss": 1.5558, "step": 65080 }, { "epoch": 1.8937080024434942, "grad_norm": 14.8125, "learning_rate": 3.6876410645609174e-06, "loss": 1.5334, "step": 65100 }, { "epoch": 1.8942897867760422, "grad_norm": 13.0, "learning_rate": 3.6857017828883025e-06, "loss": 1.5759, "step": 65120 }, { "epoch": 1.89487157110859, "grad_norm": 10.4375, "learning_rate": 3.6837625012156876e-06, "loss": 1.5059, "step": 65140 }, { "epoch": 1.8954533554411381, "grad_norm": 11.8125, "learning_rate": 3.6818232195430727e-06, "loss": 1.5883, "step": 65160 }, { "epoch": 1.8960351397736859, "grad_norm": 12.375, "learning_rate": 3.6798839378704574e-06, "loss": 1.5496, "step": 65180 }, { "epoch": 1.8966169241062338, "grad_norm": 13.125, "learning_rate": 3.6779446561978425e-06, "loss": 1.4951, "step": 65200 }, { "epoch": 1.8971987084387818, "grad_norm": 13.9375, "learning_rate": 3.6760053745252276e-06, "loss": 1.4773, "step": 65220 }, { "epoch": 1.8977804927713295, "grad_norm": 14.25, "learning_rate": 3.6740660928526127e-06, "loss": 1.5271, "step": 65240 }, { "epoch": 1.8983622771038777, "grad_norm": 12.6875, "learning_rate": 3.672126811179998e-06, "loss": 1.5227, "step": 65260 }, { "epoch": 1.8989440614364255, "grad_norm": 16.5, "learning_rate": 3.670187529507383e-06, "loss": 1.567, "step": 65280 }, { "epoch": 1.8995258457689734, "grad_norm": 14.625, "learning_rate": 3.668248247834768e-06, "loss": 1.5158, "step": 65300 }, { "epoch": 1.9001076301015214, "grad_norm": 13.4375, "learning_rate": 3.666308966162153e-06, "loss": 1.5731, "step": 65320 }, { "epoch": 1.9006894144340691, "grad_norm": 12.625, "learning_rate": 3.664369684489538e-06, "loss": 1.633, "step": 65340 }, { "epoch": 1.9012711987666173, "grad_norm": 14.0625, "learning_rate": 3.6624304028169233e-06, "loss": 1.466, "step": 65360 }, { "epoch": 1.901852983099165, "grad_norm": 13.6875, "learning_rate": 3.6604911211443084e-06, "loss": 1.5367, "step": 65380 }, { "epoch": 1.902434767431713, "grad_norm": 11.125, "learning_rate": 3.6585518394716935e-06, "loss": 1.5736, "step": 65400 }, { "epoch": 1.903016551764261, "grad_norm": 17.75, "learning_rate": 3.6566125577990786e-06, "loss": 1.5669, "step": 65420 }, { "epoch": 1.903598336096809, "grad_norm": 14.6875, "learning_rate": 3.6546732761264637e-06, "loss": 1.5658, "step": 65440 }, { "epoch": 1.904180120429357, "grad_norm": 12.6875, "learning_rate": 3.6527339944538488e-06, "loss": 1.5102, "step": 65460 }, { "epoch": 1.9047619047619047, "grad_norm": 12.0, "learning_rate": 3.650794712781234e-06, "loss": 1.4242, "step": 65480 }, { "epoch": 1.9053436890944528, "grad_norm": 14.8125, "learning_rate": 3.648855431108619e-06, "loss": 1.58, "step": 65500 }, { "epoch": 1.9059254734270006, "grad_norm": 15.875, "learning_rate": 3.646916149436004e-06, "loss": 1.4414, "step": 65520 }, { "epoch": 1.9065072577595485, "grad_norm": 12.1875, "learning_rate": 3.644976867763389e-06, "loss": 1.5314, "step": 65540 }, { "epoch": 1.9070890420920965, "grad_norm": 14.625, "learning_rate": 3.6430375860907743e-06, "loss": 1.5462, "step": 65560 }, { "epoch": 1.9076708264246443, "grad_norm": 12.75, "learning_rate": 3.641098304418159e-06, "loss": 1.4362, "step": 65580 }, { "epoch": 1.9082526107571924, "grad_norm": 15.4375, "learning_rate": 3.639159022745544e-06, "loss": 1.6175, "step": 65600 }, { "epoch": 1.9088343950897402, "grad_norm": 12.8125, "learning_rate": 3.637219741072929e-06, "loss": 1.4896, "step": 65620 }, { "epoch": 1.9094161794222881, "grad_norm": 14.25, "learning_rate": 3.6352804594003142e-06, "loss": 1.5441, "step": 65640 }, { "epoch": 1.909997963754836, "grad_norm": 12.625, "learning_rate": 3.6333411777276993e-06, "loss": 1.5906, "step": 65660 }, { "epoch": 1.9105797480873838, "grad_norm": 13.6875, "learning_rate": 3.6314018960550844e-06, "loss": 1.5146, "step": 65680 }, { "epoch": 1.911161532419932, "grad_norm": 14.0625, "learning_rate": 3.6294626143824695e-06, "loss": 1.4349, "step": 65700 }, { "epoch": 1.9117433167524798, "grad_norm": 15.5625, "learning_rate": 3.6275233327098546e-06, "loss": 1.5319, "step": 65720 }, { "epoch": 1.9123251010850277, "grad_norm": 14.5, "learning_rate": 3.6255840510372397e-06, "loss": 1.4693, "step": 65740 }, { "epoch": 1.9129068854175757, "grad_norm": 15.5625, "learning_rate": 3.623644769364625e-06, "loss": 1.6036, "step": 65760 }, { "epoch": 1.9134886697501237, "grad_norm": 13.8125, "learning_rate": 3.62170548769201e-06, "loss": 1.5009, "step": 65780 }, { "epoch": 1.9140704540826716, "grad_norm": 15.3125, "learning_rate": 3.619766206019395e-06, "loss": 1.5315, "step": 65800 }, { "epoch": 1.9146522384152194, "grad_norm": 11.875, "learning_rate": 3.61782692434678e-06, "loss": 1.6201, "step": 65820 }, { "epoch": 1.9152340227477676, "grad_norm": 8.375, "learning_rate": 3.615887642674165e-06, "loss": 1.5182, "step": 65840 }, { "epoch": 1.9158158070803153, "grad_norm": 13.9375, "learning_rate": 3.6139483610015503e-06, "loss": 1.5305, "step": 65860 }, { "epoch": 1.9163975914128633, "grad_norm": 11.25, "learning_rate": 3.6120090793289354e-06, "loss": 1.4922, "step": 65880 }, { "epoch": 1.9169793757454112, "grad_norm": 13.375, "learning_rate": 3.6100697976563205e-06, "loss": 1.5899, "step": 65900 }, { "epoch": 1.917561160077959, "grad_norm": 14.4375, "learning_rate": 3.6081305159837056e-06, "loss": 1.4647, "step": 65920 }, { "epoch": 1.9181429444105071, "grad_norm": 12.25, "learning_rate": 3.6061912343110907e-06, "loss": 1.6362, "step": 65940 }, { "epoch": 1.9187247287430549, "grad_norm": 13.9375, "learning_rate": 3.604251952638476e-06, "loss": 1.5538, "step": 65960 }, { "epoch": 1.9193065130756028, "grad_norm": 13.0, "learning_rate": 3.602312670965861e-06, "loss": 1.484, "step": 65980 }, { "epoch": 1.9198882974081508, "grad_norm": 17.125, "learning_rate": 3.6003733892932456e-06, "loss": 1.4788, "step": 66000 }, { "epoch": 1.9204700817406988, "grad_norm": 14.375, "learning_rate": 3.5984341076206307e-06, "loss": 1.5489, "step": 66020 }, { "epoch": 1.9210518660732467, "grad_norm": 12.875, "learning_rate": 3.5964948259480158e-06, "loss": 1.461, "step": 66040 }, { "epoch": 1.9216336504057945, "grad_norm": 13.25, "learning_rate": 3.594555544275401e-06, "loss": 1.5596, "step": 66060 }, { "epoch": 1.9222154347383424, "grad_norm": 12.0625, "learning_rate": 3.592616262602786e-06, "loss": 1.5128, "step": 66080 }, { "epoch": 1.9227972190708904, "grad_norm": 18.25, "learning_rate": 3.590676980930171e-06, "loss": 1.4885, "step": 66100 }, { "epoch": 1.9233790034034384, "grad_norm": 12.375, "learning_rate": 3.588737699257556e-06, "loss": 1.496, "step": 66120 }, { "epoch": 1.9239607877359863, "grad_norm": 15.25, "learning_rate": 3.5867984175849412e-06, "loss": 1.5943, "step": 66140 }, { "epoch": 1.924542572068534, "grad_norm": 13.75, "learning_rate": 3.5848591359123263e-06, "loss": 1.5122, "step": 66160 }, { "epoch": 1.9251243564010823, "grad_norm": 11.6875, "learning_rate": 3.5829198542397114e-06, "loss": 1.5545, "step": 66180 }, { "epoch": 1.92570614073363, "grad_norm": 12.0, "learning_rate": 3.580980572567096e-06, "loss": 1.5628, "step": 66200 }, { "epoch": 1.926287925066178, "grad_norm": 12.9375, "learning_rate": 3.579041290894481e-06, "loss": 1.4236, "step": 66220 }, { "epoch": 1.926869709398726, "grad_norm": 15.3125, "learning_rate": 3.5771020092218663e-06, "loss": 1.5351, "step": 66240 }, { "epoch": 1.9274514937312737, "grad_norm": 11.0625, "learning_rate": 3.5751627275492514e-06, "loss": 1.5499, "step": 66260 }, { "epoch": 1.9280332780638219, "grad_norm": 13.1875, "learning_rate": 3.573223445876636e-06, "loss": 1.5883, "step": 66280 }, { "epoch": 1.9286150623963696, "grad_norm": 12.0625, "learning_rate": 3.571284164204021e-06, "loss": 1.4578, "step": 66300 }, { "epoch": 1.9291968467289176, "grad_norm": 11.5, "learning_rate": 3.5693448825314063e-06, "loss": 1.4527, "step": 66320 }, { "epoch": 1.9297786310614655, "grad_norm": 13.8125, "learning_rate": 3.5674056008587914e-06, "loss": 1.5393, "step": 66340 }, { "epoch": 1.9303604153940135, "grad_norm": 11.3125, "learning_rate": 3.5654663191861765e-06, "loss": 1.5419, "step": 66360 }, { "epoch": 1.9309421997265614, "grad_norm": 13.875, "learning_rate": 3.5635270375135616e-06, "loss": 1.5344, "step": 66380 }, { "epoch": 1.9315239840591092, "grad_norm": 12.625, "learning_rate": 3.5615877558409467e-06, "loss": 1.5528, "step": 66400 }, { "epoch": 1.9321057683916574, "grad_norm": 13.5625, "learning_rate": 3.5596484741683318e-06, "loss": 1.4706, "step": 66420 }, { "epoch": 1.9326875527242051, "grad_norm": 13.0, "learning_rate": 3.557709192495717e-06, "loss": 1.5307, "step": 66440 }, { "epoch": 1.933269337056753, "grad_norm": 14.875, "learning_rate": 3.555769910823102e-06, "loss": 1.5119, "step": 66460 }, { "epoch": 1.933851121389301, "grad_norm": 11.875, "learning_rate": 3.553830629150487e-06, "loss": 1.474, "step": 66480 }, { "epoch": 1.9344329057218488, "grad_norm": 14.5, "learning_rate": 3.551891347477872e-06, "loss": 1.5309, "step": 66500 }, { "epoch": 1.935014690054397, "grad_norm": 12.125, "learning_rate": 3.5499520658052573e-06, "loss": 1.509, "step": 66520 }, { "epoch": 1.9355964743869447, "grad_norm": 14.5625, "learning_rate": 3.5480127841326423e-06, "loss": 1.5758, "step": 66540 }, { "epoch": 1.9361782587194927, "grad_norm": 15.3125, "learning_rate": 3.5460735024600274e-06, "loss": 1.4817, "step": 66560 }, { "epoch": 1.9367600430520406, "grad_norm": 15.125, "learning_rate": 3.5441342207874125e-06, "loss": 1.4302, "step": 66580 }, { "epoch": 1.9373418273845884, "grad_norm": 13.5, "learning_rate": 3.5421949391147976e-06, "loss": 1.5171, "step": 66600 }, { "epoch": 1.9379236117171366, "grad_norm": 13.5, "learning_rate": 3.5402556574421827e-06, "loss": 1.5666, "step": 66620 }, { "epoch": 1.9385053960496843, "grad_norm": 13.5, "learning_rate": 3.538316375769568e-06, "loss": 1.4887, "step": 66640 }, { "epoch": 1.9390871803822323, "grad_norm": 13.0625, "learning_rate": 3.536377094096953e-06, "loss": 1.4681, "step": 66660 }, { "epoch": 1.9396689647147802, "grad_norm": 12.9375, "learning_rate": 3.5344378124243376e-06, "loss": 1.4435, "step": 66680 }, { "epoch": 1.9402507490473282, "grad_norm": 15.0625, "learning_rate": 3.5324985307517227e-06, "loss": 1.4985, "step": 66700 }, { "epoch": 1.9408325333798762, "grad_norm": 12.5, "learning_rate": 3.530559249079108e-06, "loss": 1.4723, "step": 66720 }, { "epoch": 1.941414317712424, "grad_norm": 11.4375, "learning_rate": 3.528619967406493e-06, "loss": 1.5144, "step": 66740 }, { "epoch": 1.941996102044972, "grad_norm": 15.3125, "learning_rate": 3.526680685733878e-06, "loss": 1.5583, "step": 66760 }, { "epoch": 1.9425778863775198, "grad_norm": 15.0, "learning_rate": 3.524741404061263e-06, "loss": 1.5899, "step": 66780 }, { "epoch": 1.9431596707100678, "grad_norm": 13.5, "learning_rate": 3.522802122388648e-06, "loss": 1.5051, "step": 66800 }, { "epoch": 1.9437414550426158, "grad_norm": 14.6875, "learning_rate": 3.5208628407160333e-06, "loss": 1.5192, "step": 66820 }, { "epoch": 1.9443232393751635, "grad_norm": 11.0, "learning_rate": 3.5189235590434184e-06, "loss": 1.5179, "step": 66840 }, { "epoch": 1.9449050237077117, "grad_norm": 13.4375, "learning_rate": 3.5169842773708035e-06, "loss": 1.4844, "step": 66860 }, { "epoch": 1.9454868080402594, "grad_norm": 13.4375, "learning_rate": 3.5150449956981886e-06, "loss": 1.4773, "step": 66880 }, { "epoch": 1.9460685923728074, "grad_norm": 15.4375, "learning_rate": 3.5131057140255737e-06, "loss": 1.4834, "step": 66900 }, { "epoch": 1.9466503767053553, "grad_norm": 13.4375, "learning_rate": 3.5111664323529588e-06, "loss": 1.4458, "step": 66920 }, { "epoch": 1.947232161037903, "grad_norm": 13.6875, "learning_rate": 3.509227150680344e-06, "loss": 1.3961, "step": 66940 }, { "epoch": 1.9478139453704513, "grad_norm": 18.625, "learning_rate": 3.507287869007729e-06, "loss": 1.4644, "step": 66960 }, { "epoch": 1.948395729702999, "grad_norm": 15.125, "learning_rate": 3.505348587335114e-06, "loss": 1.4645, "step": 66980 }, { "epoch": 1.948977514035547, "grad_norm": 12.25, "learning_rate": 3.503409305662499e-06, "loss": 1.5278, "step": 67000 }, { "epoch": 1.949559298368095, "grad_norm": 13.5625, "learning_rate": 3.5014700239898843e-06, "loss": 1.5145, "step": 67020 }, { "epoch": 1.950141082700643, "grad_norm": 14.25, "learning_rate": 3.4995307423172694e-06, "loss": 1.4483, "step": 67040 }, { "epoch": 1.9507228670331909, "grad_norm": 14.8125, "learning_rate": 3.4975914606446545e-06, "loss": 1.5122, "step": 67060 }, { "epoch": 1.9513046513657386, "grad_norm": 12.3125, "learning_rate": 3.4956521789720396e-06, "loss": 1.4802, "step": 67080 }, { "epoch": 1.9518864356982868, "grad_norm": 12.625, "learning_rate": 3.4937128972994242e-06, "loss": 1.5416, "step": 67100 }, { "epoch": 1.9524682200308345, "grad_norm": 12.875, "learning_rate": 3.4917736156268093e-06, "loss": 1.5825, "step": 67120 }, { "epoch": 1.9530500043633825, "grad_norm": 11.1875, "learning_rate": 3.4898343339541944e-06, "loss": 1.4703, "step": 67140 }, { "epoch": 1.9536317886959305, "grad_norm": 14.3125, "learning_rate": 3.4878950522815795e-06, "loss": 1.5021, "step": 67160 }, { "epoch": 1.9542135730284782, "grad_norm": 13.125, "learning_rate": 3.4859557706089646e-06, "loss": 1.5426, "step": 67180 }, { "epoch": 1.9547953573610264, "grad_norm": 13.125, "learning_rate": 3.4840164889363497e-06, "loss": 1.6451, "step": 67200 }, { "epoch": 1.9553771416935741, "grad_norm": 11.9375, "learning_rate": 3.482077207263735e-06, "loss": 1.4643, "step": 67220 }, { "epoch": 1.955958926026122, "grad_norm": 11.875, "learning_rate": 3.48013792559112e-06, "loss": 1.4222, "step": 67240 }, { "epoch": 1.95654071035867, "grad_norm": 13.4375, "learning_rate": 3.478198643918505e-06, "loss": 1.5199, "step": 67260 }, { "epoch": 1.957122494691218, "grad_norm": 14.75, "learning_rate": 3.47625936224589e-06, "loss": 1.52, "step": 67280 }, { "epoch": 1.957704279023766, "grad_norm": 12.8125, "learning_rate": 3.474320080573275e-06, "loss": 1.5264, "step": 67300 }, { "epoch": 1.9582860633563137, "grad_norm": 15.6875, "learning_rate": 3.4723807989006603e-06, "loss": 1.4517, "step": 67320 }, { "epoch": 1.9588678476888617, "grad_norm": 13.3125, "learning_rate": 3.4704415172280454e-06, "loss": 1.4373, "step": 67340 }, { "epoch": 1.9594496320214096, "grad_norm": 11.6875, "learning_rate": 3.4685022355554305e-06, "loss": 1.4871, "step": 67360 }, { "epoch": 1.9600314163539576, "grad_norm": 13.625, "learning_rate": 3.4665629538828156e-06, "loss": 1.456, "step": 67380 }, { "epoch": 1.9606132006865056, "grad_norm": 17.25, "learning_rate": 3.4646236722102007e-06, "loss": 1.6015, "step": 67400 }, { "epoch": 1.9611949850190533, "grad_norm": 13.5, "learning_rate": 3.462684390537586e-06, "loss": 1.439, "step": 67420 }, { "epoch": 1.9617767693516015, "grad_norm": 17.125, "learning_rate": 3.460745108864971e-06, "loss": 1.4945, "step": 67440 }, { "epoch": 1.9623585536841492, "grad_norm": 14.0625, "learning_rate": 3.458805827192356e-06, "loss": 1.4674, "step": 67460 }, { "epoch": 1.9629403380166972, "grad_norm": 12.3125, "learning_rate": 3.456866545519741e-06, "loss": 1.5322, "step": 67480 }, { "epoch": 1.9635221223492452, "grad_norm": 15.0625, "learning_rate": 3.4549272638471258e-06, "loss": 1.472, "step": 67500 }, { "epoch": 1.964103906681793, "grad_norm": 11.125, "learning_rate": 3.452987982174511e-06, "loss": 1.5724, "step": 67520 }, { "epoch": 1.964685691014341, "grad_norm": 14.25, "learning_rate": 3.451048700501896e-06, "loss": 1.6187, "step": 67540 }, { "epoch": 1.9652674753468888, "grad_norm": 12.625, "learning_rate": 3.449109418829281e-06, "loss": 1.4714, "step": 67560 }, { "epoch": 1.9658492596794368, "grad_norm": 12.25, "learning_rate": 3.447170137156666e-06, "loss": 1.4059, "step": 67580 }, { "epoch": 1.9664310440119848, "grad_norm": 13.0625, "learning_rate": 3.4452308554840512e-06, "loss": 1.5153, "step": 67600 }, { "epoch": 1.9670128283445327, "grad_norm": 14.125, "learning_rate": 3.4432915738114363e-06, "loss": 1.4906, "step": 67620 }, { "epoch": 1.9675946126770807, "grad_norm": 14.375, "learning_rate": 3.4413522921388214e-06, "loss": 1.5138, "step": 67640 }, { "epoch": 1.9681763970096284, "grad_norm": 12.9375, "learning_rate": 3.4394130104662065e-06, "loss": 1.4947, "step": 67660 }, { "epoch": 1.9687581813421766, "grad_norm": 13.75, "learning_rate": 3.4374737287935916e-06, "loss": 1.502, "step": 67680 }, { "epoch": 1.9693399656747244, "grad_norm": 14.6875, "learning_rate": 3.4355344471209767e-06, "loss": 1.6162, "step": 67700 }, { "epoch": 1.9699217500072723, "grad_norm": 14.9375, "learning_rate": 3.433595165448362e-06, "loss": 1.5033, "step": 67720 }, { "epoch": 1.9705035343398203, "grad_norm": 13.8125, "learning_rate": 3.431655883775747e-06, "loss": 1.516, "step": 67740 }, { "epoch": 1.971085318672368, "grad_norm": 13.125, "learning_rate": 3.429716602103132e-06, "loss": 1.5823, "step": 67760 }, { "epoch": 1.9716671030049162, "grad_norm": 15.4375, "learning_rate": 3.427777320430517e-06, "loss": 1.5057, "step": 67780 }, { "epoch": 1.972248887337464, "grad_norm": 13.75, "learning_rate": 3.4258380387579022e-06, "loss": 1.4956, "step": 67800 }, { "epoch": 1.972830671670012, "grad_norm": 16.375, "learning_rate": 3.4238987570852873e-06, "loss": 1.4952, "step": 67820 }, { "epoch": 1.9734124560025599, "grad_norm": 14.75, "learning_rate": 3.4219594754126724e-06, "loss": 1.5201, "step": 67840 }, { "epoch": 1.9739942403351076, "grad_norm": 13.4375, "learning_rate": 3.4200201937400575e-06, "loss": 1.576, "step": 67860 }, { "epoch": 1.9745760246676558, "grad_norm": 12.5625, "learning_rate": 3.4180809120674426e-06, "loss": 1.5281, "step": 67880 }, { "epoch": 1.9751578090002035, "grad_norm": 12.1875, "learning_rate": 3.4161416303948273e-06, "loss": 1.4996, "step": 67900 }, { "epoch": 1.9757395933327515, "grad_norm": 11.8125, "learning_rate": 3.4142023487222124e-06, "loss": 1.5158, "step": 67920 }, { "epoch": 1.9763213776652995, "grad_norm": 14.3125, "learning_rate": 3.4122630670495975e-06, "loss": 1.4658, "step": 67940 }, { "epoch": 1.9769031619978474, "grad_norm": 13.4375, "learning_rate": 3.4103237853769826e-06, "loss": 1.46, "step": 67960 }, { "epoch": 1.9774849463303954, "grad_norm": 17.875, "learning_rate": 3.4083845037043677e-06, "loss": 1.5656, "step": 67980 }, { "epoch": 1.9780667306629431, "grad_norm": 12.4375, "learning_rate": 3.4064452220317528e-06, "loss": 1.4976, "step": 68000 }, { "epoch": 1.9786485149954913, "grad_norm": 14.1875, "learning_rate": 3.404505940359138e-06, "loss": 1.4997, "step": 68020 }, { "epoch": 1.979230299328039, "grad_norm": 17.0, "learning_rate": 3.402566658686523e-06, "loss": 1.4584, "step": 68040 }, { "epoch": 1.979812083660587, "grad_norm": 14.625, "learning_rate": 3.400627377013908e-06, "loss": 1.5321, "step": 68060 }, { "epoch": 1.980393867993135, "grad_norm": 12.1875, "learning_rate": 3.398688095341293e-06, "loss": 1.3941, "step": 68080 }, { "epoch": 1.9809756523256827, "grad_norm": 12.1875, "learning_rate": 3.3967488136686783e-06, "loss": 1.5388, "step": 68100 }, { "epoch": 1.981557436658231, "grad_norm": 14.0, "learning_rate": 3.3948095319960634e-06, "loss": 1.5299, "step": 68120 }, { "epoch": 1.9821392209907787, "grad_norm": 12.6875, "learning_rate": 3.3928702503234485e-06, "loss": 1.4172, "step": 68140 }, { "epoch": 1.9827210053233266, "grad_norm": 15.5625, "learning_rate": 3.3909309686508335e-06, "loss": 1.4967, "step": 68160 }, { "epoch": 1.9833027896558746, "grad_norm": 14.4375, "learning_rate": 3.3889916869782186e-06, "loss": 1.4214, "step": 68180 }, { "epoch": 1.9838845739884223, "grad_norm": 12.75, "learning_rate": 3.3870524053056037e-06, "loss": 1.5038, "step": 68200 }, { "epoch": 1.9844663583209705, "grad_norm": 13.75, "learning_rate": 3.385113123632989e-06, "loss": 1.568, "step": 68220 }, { "epoch": 1.9850481426535183, "grad_norm": 14.3125, "learning_rate": 3.383173841960374e-06, "loss": 1.5062, "step": 68240 }, { "epoch": 1.9856299269860662, "grad_norm": 11.6875, "learning_rate": 3.381234560287759e-06, "loss": 1.4475, "step": 68260 }, { "epoch": 1.9862117113186142, "grad_norm": 13.0, "learning_rate": 3.379295278615144e-06, "loss": 1.4878, "step": 68280 }, { "epoch": 1.9867934956511621, "grad_norm": 12.375, "learning_rate": 3.3773559969425292e-06, "loss": 1.5715, "step": 68300 }, { "epoch": 1.98737527998371, "grad_norm": 15.0, "learning_rate": 3.375416715269914e-06, "loss": 1.4876, "step": 68320 }, { "epoch": 1.9879570643162578, "grad_norm": 16.5, "learning_rate": 3.3734774335972986e-06, "loss": 1.4497, "step": 68340 }, { "epoch": 1.988538848648806, "grad_norm": 15.4375, "learning_rate": 3.3715381519246837e-06, "loss": 1.4886, "step": 68360 }, { "epoch": 1.9891206329813538, "grad_norm": 11.6875, "learning_rate": 3.3695988702520688e-06, "loss": 1.4935, "step": 68380 }, { "epoch": 1.9897024173139017, "grad_norm": 13.5, "learning_rate": 3.367659588579454e-06, "loss": 1.4687, "step": 68400 }, { "epoch": 1.9902842016464497, "grad_norm": 14.0, "learning_rate": 3.365720306906839e-06, "loss": 1.5841, "step": 68420 }, { "epoch": 1.9908659859789974, "grad_norm": 12.125, "learning_rate": 3.363781025234224e-06, "loss": 1.5813, "step": 68440 }, { "epoch": 1.9914477703115456, "grad_norm": 13.875, "learning_rate": 3.361841743561609e-06, "loss": 1.5345, "step": 68460 }, { "epoch": 1.9920295546440934, "grad_norm": 13.0, "learning_rate": 3.3599024618889943e-06, "loss": 1.5137, "step": 68480 }, { "epoch": 1.9926113389766413, "grad_norm": 13.4375, "learning_rate": 3.3579631802163794e-06, "loss": 1.4971, "step": 68500 }, { "epoch": 1.9931931233091893, "grad_norm": 14.375, "learning_rate": 3.3560238985437645e-06, "loss": 1.4585, "step": 68520 }, { "epoch": 1.9937749076417373, "grad_norm": 9.375, "learning_rate": 3.3540846168711496e-06, "loss": 1.4688, "step": 68540 }, { "epoch": 1.9943566919742852, "grad_norm": 14.6875, "learning_rate": 3.3521453351985347e-06, "loss": 1.5335, "step": 68560 }, { "epoch": 1.994938476306833, "grad_norm": 13.3125, "learning_rate": 3.3502060535259197e-06, "loss": 1.4753, "step": 68580 }, { "epoch": 1.995520260639381, "grad_norm": 14.6875, "learning_rate": 3.3482667718533044e-06, "loss": 1.5276, "step": 68600 }, { "epoch": 1.996102044971929, "grad_norm": 10.625, "learning_rate": 3.3463274901806895e-06, "loss": 1.589, "step": 68620 }, { "epoch": 1.9966838293044769, "grad_norm": 12.3125, "learning_rate": 3.3443882085080746e-06, "loss": 1.448, "step": 68640 }, { "epoch": 1.9972656136370248, "grad_norm": 11.3125, "learning_rate": 3.3424489268354597e-06, "loss": 1.5355, "step": 68660 }, { "epoch": 1.9978473979695726, "grad_norm": 11.4375, "learning_rate": 3.340509645162845e-06, "loss": 1.462, "step": 68680 }, { "epoch": 1.9984291823021207, "grad_norm": 14.375, "learning_rate": 3.33857036349023e-06, "loss": 1.47, "step": 68700 }, { "epoch": 1.9990109666346685, "grad_norm": 12.6875, "learning_rate": 3.336631081817615e-06, "loss": 1.4833, "step": 68720 }, { "epoch": 1.9995927509672164, "grad_norm": 12.6875, "learning_rate": 3.334691800145e-06, "loss": 1.5627, "step": 68740 }, { "epoch": 2.0001745352997644, "grad_norm": 12.6875, "learning_rate": 3.332752518472385e-06, "loss": 1.4391, "step": 68760 }, { "epoch": 2.000756319632312, "grad_norm": 16.375, "learning_rate": 3.3308132367997703e-06, "loss": 1.5977, "step": 68780 }, { "epoch": 2.0013381039648603, "grad_norm": 12.875, "learning_rate": 3.3288739551271554e-06, "loss": 1.4505, "step": 68800 }, { "epoch": 2.001919888297408, "grad_norm": 12.75, "learning_rate": 3.3269346734545405e-06, "loss": 1.5455, "step": 68820 }, { "epoch": 2.0025016726299563, "grad_norm": 12.9375, "learning_rate": 3.3249953917819256e-06, "loss": 1.4766, "step": 68840 }, { "epoch": 2.003083456962504, "grad_norm": 13.8125, "learning_rate": 3.3230561101093107e-06, "loss": 1.6074, "step": 68860 }, { "epoch": 2.0036652412950517, "grad_norm": 14.3125, "learning_rate": 3.3211168284366958e-06, "loss": 1.4583, "step": 68880 }, { "epoch": 2.0042470256276, "grad_norm": 14.8125, "learning_rate": 3.319177546764081e-06, "loss": 1.5819, "step": 68900 }, { "epoch": 2.0048288099601477, "grad_norm": 12.5, "learning_rate": 3.317238265091466e-06, "loss": 1.4428, "step": 68920 }, { "epoch": 2.005410594292696, "grad_norm": 13.0625, "learning_rate": 3.315298983418851e-06, "loss": 1.513, "step": 68940 }, { "epoch": 2.0059923786252436, "grad_norm": 12.5, "learning_rate": 3.313359701746236e-06, "loss": 1.4663, "step": 68960 }, { "epoch": 2.0065741629577913, "grad_norm": 11.375, "learning_rate": 3.3114204200736213e-06, "loss": 1.5026, "step": 68980 }, { "epoch": 2.0071559472903395, "grad_norm": 14.9375, "learning_rate": 3.309481138401006e-06, "loss": 1.5613, "step": 69000 }, { "epoch": 2.0077377316228873, "grad_norm": 13.625, "learning_rate": 3.307541856728391e-06, "loss": 1.4829, "step": 69020 }, { "epoch": 2.0083195159554355, "grad_norm": 12.1875, "learning_rate": 3.305602575055776e-06, "loss": 1.4124, "step": 69040 }, { "epoch": 2.008901300287983, "grad_norm": 18.625, "learning_rate": 3.3036632933831612e-06, "loss": 1.4377, "step": 69060 }, { "epoch": 2.0094830846205314, "grad_norm": 14.3125, "learning_rate": 3.3017240117105463e-06, "loss": 1.5308, "step": 69080 }, { "epoch": 2.010064868953079, "grad_norm": 16.625, "learning_rate": 3.2997847300379314e-06, "loss": 1.4723, "step": 69100 }, { "epoch": 2.010646653285627, "grad_norm": 17.25, "learning_rate": 3.2978454483653165e-06, "loss": 1.5134, "step": 69120 }, { "epoch": 2.011228437618175, "grad_norm": 15.125, "learning_rate": 3.2959061666927016e-06, "loss": 1.4609, "step": 69140 }, { "epoch": 2.011810221950723, "grad_norm": 13.625, "learning_rate": 3.2939668850200867e-06, "loss": 1.5012, "step": 69160 }, { "epoch": 2.012392006283271, "grad_norm": 16.75, "learning_rate": 3.292027603347472e-06, "loss": 1.5458, "step": 69180 }, { "epoch": 2.0129737906158187, "grad_norm": 15.0, "learning_rate": 3.290088321674857e-06, "loss": 1.5486, "step": 69200 }, { "epoch": 2.0135555749483665, "grad_norm": 17.625, "learning_rate": 3.288149040002242e-06, "loss": 1.4424, "step": 69220 }, { "epoch": 2.0141373592809146, "grad_norm": 14.4375, "learning_rate": 3.286209758329627e-06, "loss": 1.5176, "step": 69240 }, { "epoch": 2.0147191436134624, "grad_norm": 11.3125, "learning_rate": 3.2842704766570122e-06, "loss": 1.5829, "step": 69260 }, { "epoch": 2.0153009279460106, "grad_norm": 16.625, "learning_rate": 3.2823311949843973e-06, "loss": 1.4862, "step": 69280 }, { "epoch": 2.0158827122785583, "grad_norm": 15.4375, "learning_rate": 3.2803919133117824e-06, "loss": 1.4982, "step": 69300 }, { "epoch": 2.016464496611106, "grad_norm": 11.8125, "learning_rate": 3.2784526316391675e-06, "loss": 1.5102, "step": 69320 }, { "epoch": 2.0170462809436542, "grad_norm": 13.625, "learning_rate": 3.2765133499665526e-06, "loss": 1.488, "step": 69340 }, { "epoch": 2.017628065276202, "grad_norm": 14.9375, "learning_rate": 3.2745740682939377e-06, "loss": 1.5736, "step": 69360 }, { "epoch": 2.01820984960875, "grad_norm": 16.25, "learning_rate": 3.272634786621323e-06, "loss": 1.519, "step": 69380 }, { "epoch": 2.018791633941298, "grad_norm": 14.25, "learning_rate": 3.270695504948708e-06, "loss": 1.4977, "step": 69400 }, { "epoch": 2.019373418273846, "grad_norm": 15.25, "learning_rate": 3.2687562232760926e-06, "loss": 1.4406, "step": 69420 }, { "epoch": 2.019955202606394, "grad_norm": 14.625, "learning_rate": 3.2668169416034777e-06, "loss": 1.5004, "step": 69440 }, { "epoch": 2.0205369869389416, "grad_norm": 12.6875, "learning_rate": 3.2648776599308628e-06, "loss": 1.4982, "step": 69460 }, { "epoch": 2.0211187712714898, "grad_norm": 15.3125, "learning_rate": 3.262938378258248e-06, "loss": 1.535, "step": 69480 }, { "epoch": 2.0217005556040375, "grad_norm": 14.8125, "learning_rate": 3.260999096585633e-06, "loss": 1.5355, "step": 69500 }, { "epoch": 2.0222823399365857, "grad_norm": 11.3125, "learning_rate": 3.259059814913018e-06, "loss": 1.5343, "step": 69520 }, { "epoch": 2.0228641242691334, "grad_norm": 12.875, "learning_rate": 3.257120533240403e-06, "loss": 1.503, "step": 69540 }, { "epoch": 2.023445908601681, "grad_norm": 14.1875, "learning_rate": 3.2551812515677883e-06, "loss": 1.5212, "step": 69560 }, { "epoch": 2.0240276929342293, "grad_norm": 13.875, "learning_rate": 3.2532419698951734e-06, "loss": 1.4834, "step": 69580 }, { "epoch": 2.024609477266777, "grad_norm": 12.8125, "learning_rate": 3.2513026882225584e-06, "loss": 1.5213, "step": 69600 }, { "epoch": 2.0251912615993253, "grad_norm": 13.5, "learning_rate": 3.2493634065499435e-06, "loss": 1.5377, "step": 69620 }, { "epoch": 2.025773045931873, "grad_norm": 13.5, "learning_rate": 3.2474241248773286e-06, "loss": 1.4808, "step": 69640 }, { "epoch": 2.026354830264421, "grad_norm": 14.75, "learning_rate": 3.2454848432047137e-06, "loss": 1.4268, "step": 69660 }, { "epoch": 2.026936614596969, "grad_norm": 13.125, "learning_rate": 3.243545561532099e-06, "loss": 1.5044, "step": 69680 }, { "epoch": 2.0275183989295167, "grad_norm": 14.4375, "learning_rate": 3.241606279859484e-06, "loss": 1.52, "step": 69700 }, { "epoch": 2.028100183262065, "grad_norm": 12.8125, "learning_rate": 3.239666998186869e-06, "loss": 1.4395, "step": 69720 }, { "epoch": 2.0286819675946126, "grad_norm": 12.25, "learning_rate": 3.237727716514254e-06, "loss": 1.3982, "step": 69740 }, { "epoch": 2.029263751927161, "grad_norm": 13.0, "learning_rate": 3.2357884348416392e-06, "loss": 1.4692, "step": 69760 }, { "epoch": 2.0298455362597085, "grad_norm": 11.625, "learning_rate": 3.2338491531690243e-06, "loss": 1.412, "step": 69780 }, { "epoch": 2.0304273205922563, "grad_norm": 13.875, "learning_rate": 3.2319098714964094e-06, "loss": 1.5024, "step": 69800 }, { "epoch": 2.0310091049248045, "grad_norm": 10.6875, "learning_rate": 3.229970589823794e-06, "loss": 1.5416, "step": 69820 }, { "epoch": 2.031590889257352, "grad_norm": 13.0, "learning_rate": 3.228031308151179e-06, "loss": 1.4766, "step": 69840 }, { "epoch": 2.0321726735899004, "grad_norm": 15.4375, "learning_rate": 3.2260920264785643e-06, "loss": 1.4842, "step": 69860 }, { "epoch": 2.032754457922448, "grad_norm": 12.25, "learning_rate": 3.2241527448059494e-06, "loss": 1.483, "step": 69880 }, { "epoch": 2.033336242254996, "grad_norm": 10.75, "learning_rate": 3.2222134631333345e-06, "loss": 1.4638, "step": 69900 }, { "epoch": 2.033918026587544, "grad_norm": 12.3125, "learning_rate": 3.2202741814607196e-06, "loss": 1.5076, "step": 69920 }, { "epoch": 2.034499810920092, "grad_norm": 12.9375, "learning_rate": 3.2183348997881047e-06, "loss": 1.4741, "step": 69940 }, { "epoch": 2.03508159525264, "grad_norm": 12.375, "learning_rate": 3.2163956181154898e-06, "loss": 1.4623, "step": 69960 }, { "epoch": 2.0356633795851877, "grad_norm": 15.25, "learning_rate": 3.214456336442875e-06, "loss": 1.6044, "step": 69980 }, { "epoch": 2.036245163917736, "grad_norm": 13.4375, "learning_rate": 3.21251705477026e-06, "loss": 1.4792, "step": 70000 }, { "epoch": 2.0368269482502837, "grad_norm": 13.375, "learning_rate": 3.210577773097645e-06, "loss": 1.5712, "step": 70020 }, { "epoch": 2.0374087325828314, "grad_norm": 11.25, "learning_rate": 3.20863849142503e-06, "loss": 1.4752, "step": 70040 }, { "epoch": 2.0379905169153796, "grad_norm": 14.3125, "learning_rate": 3.2066992097524153e-06, "loss": 1.5129, "step": 70060 }, { "epoch": 2.0385723012479273, "grad_norm": 13.25, "learning_rate": 3.2047599280798004e-06, "loss": 1.5771, "step": 70080 }, { "epoch": 2.0391540855804755, "grad_norm": 12.4375, "learning_rate": 3.2028206464071855e-06, "loss": 1.4345, "step": 70100 }, { "epoch": 2.0397358699130232, "grad_norm": 12.625, "learning_rate": 3.2008813647345706e-06, "loss": 1.4438, "step": 70120 }, { "epoch": 2.040317654245571, "grad_norm": 12.75, "learning_rate": 3.1989420830619557e-06, "loss": 1.5879, "step": 70140 }, { "epoch": 2.040899438578119, "grad_norm": 14.125, "learning_rate": 3.1970028013893408e-06, "loss": 1.5241, "step": 70160 }, { "epoch": 2.041481222910667, "grad_norm": 14.3125, "learning_rate": 3.195063519716726e-06, "loss": 1.6103, "step": 70180 }, { "epoch": 2.042063007243215, "grad_norm": 14.125, "learning_rate": 3.193124238044111e-06, "loss": 1.4937, "step": 70200 }, { "epoch": 2.042644791575763, "grad_norm": 12.0, "learning_rate": 3.1911849563714956e-06, "loss": 1.47, "step": 70220 }, { "epoch": 2.0432265759083106, "grad_norm": 13.75, "learning_rate": 3.1892456746988807e-06, "loss": 1.5478, "step": 70240 }, { "epoch": 2.0438083602408588, "grad_norm": 14.6875, "learning_rate": 3.187306393026266e-06, "loss": 1.4608, "step": 70260 }, { "epoch": 2.0443901445734065, "grad_norm": 15.8125, "learning_rate": 3.185367111353651e-06, "loss": 1.545, "step": 70280 }, { "epoch": 2.0449719289059547, "grad_norm": 13.25, "learning_rate": 3.183427829681036e-06, "loss": 1.5346, "step": 70300 }, { "epoch": 2.0455537132385024, "grad_norm": 13.875, "learning_rate": 3.181488548008421e-06, "loss": 1.4697, "step": 70320 }, { "epoch": 2.0461354975710506, "grad_norm": 14.5625, "learning_rate": 3.179549266335806e-06, "loss": 1.4163, "step": 70340 }, { "epoch": 2.0467172819035984, "grad_norm": 15.6875, "learning_rate": 3.1776099846631913e-06, "loss": 1.4977, "step": 70360 }, { "epoch": 2.047299066236146, "grad_norm": 15.5, "learning_rate": 3.1756707029905764e-06, "loss": 1.6234, "step": 70380 }, { "epoch": 2.0478808505686943, "grad_norm": 14.375, "learning_rate": 3.1737314213179615e-06, "loss": 1.4976, "step": 70400 }, { "epoch": 2.048462634901242, "grad_norm": 13.875, "learning_rate": 3.1717921396453466e-06, "loss": 1.5693, "step": 70420 }, { "epoch": 2.04904441923379, "grad_norm": 12.5625, "learning_rate": 3.1698528579727317e-06, "loss": 1.5112, "step": 70440 }, { "epoch": 2.049626203566338, "grad_norm": 13.5, "learning_rate": 3.1679135763001164e-06, "loss": 1.4604, "step": 70460 }, { "epoch": 2.0502079878988857, "grad_norm": 13.625, "learning_rate": 3.1659742946275015e-06, "loss": 1.4615, "step": 70480 }, { "epoch": 2.050789772231434, "grad_norm": 13.875, "learning_rate": 3.164035012954886e-06, "loss": 1.5743, "step": 70500 }, { "epoch": 2.0513715565639816, "grad_norm": 13.375, "learning_rate": 3.1620957312822712e-06, "loss": 1.4923, "step": 70520 }, { "epoch": 2.05195334089653, "grad_norm": 13.0, "learning_rate": 3.1601564496096563e-06, "loss": 1.4632, "step": 70540 }, { "epoch": 2.0525351252290776, "grad_norm": 14.625, "learning_rate": 3.1582171679370414e-06, "loss": 1.5149, "step": 70560 }, { "epoch": 2.0531169095616253, "grad_norm": 14.0625, "learning_rate": 3.1562778862644265e-06, "loss": 1.525, "step": 70580 }, { "epoch": 2.0536986938941735, "grad_norm": 14.0, "learning_rate": 3.1543386045918116e-06, "loss": 1.5286, "step": 70600 }, { "epoch": 2.054280478226721, "grad_norm": 14.25, "learning_rate": 3.1523993229191967e-06, "loss": 1.4856, "step": 70620 }, { "epoch": 2.0548622625592694, "grad_norm": 14.4375, "learning_rate": 3.150460041246582e-06, "loss": 1.4911, "step": 70640 }, { "epoch": 2.055444046891817, "grad_norm": 13.5, "learning_rate": 3.148520759573967e-06, "loss": 1.5228, "step": 70660 }, { "epoch": 2.0560258312243653, "grad_norm": 12.6875, "learning_rate": 3.146581477901352e-06, "loss": 1.4383, "step": 70680 }, { "epoch": 2.056607615556913, "grad_norm": 12.8125, "learning_rate": 3.144642196228737e-06, "loss": 1.4915, "step": 70700 }, { "epoch": 2.057189399889461, "grad_norm": 13.0, "learning_rate": 3.1427029145561222e-06, "loss": 1.4782, "step": 70720 }, { "epoch": 2.057771184222009, "grad_norm": 11.4375, "learning_rate": 3.1407636328835073e-06, "loss": 1.485, "step": 70740 }, { "epoch": 2.0583529685545567, "grad_norm": 14.25, "learning_rate": 3.1388243512108924e-06, "loss": 1.6041, "step": 70760 }, { "epoch": 2.058934752887105, "grad_norm": 13.1875, "learning_rate": 3.1368850695382775e-06, "loss": 1.4376, "step": 70780 }, { "epoch": 2.0595165372196527, "grad_norm": 13.8125, "learning_rate": 3.1349457878656626e-06, "loss": 1.4167, "step": 70800 }, { "epoch": 2.0600983215522004, "grad_norm": 13.6875, "learning_rate": 3.1330065061930477e-06, "loss": 1.4912, "step": 70820 }, { "epoch": 2.0606801058847486, "grad_norm": 12.9375, "learning_rate": 3.131067224520433e-06, "loss": 1.5176, "step": 70840 }, { "epoch": 2.0612618902172963, "grad_norm": 13.1875, "learning_rate": 3.129127942847818e-06, "loss": 1.4853, "step": 70860 }, { "epoch": 2.0618436745498445, "grad_norm": 13.4375, "learning_rate": 3.127188661175203e-06, "loss": 1.4644, "step": 70880 }, { "epoch": 2.0624254588823923, "grad_norm": 11.0625, "learning_rate": 3.125249379502588e-06, "loss": 1.4847, "step": 70900 }, { "epoch": 2.0630072432149404, "grad_norm": 12.5, "learning_rate": 3.1233100978299728e-06, "loss": 1.5436, "step": 70920 }, { "epoch": 2.063589027547488, "grad_norm": 12.875, "learning_rate": 3.121370816157358e-06, "loss": 1.4474, "step": 70940 }, { "epoch": 2.064170811880036, "grad_norm": 11.625, "learning_rate": 3.119431534484743e-06, "loss": 1.5005, "step": 70960 }, { "epoch": 2.064752596212584, "grad_norm": 11.8125, "learning_rate": 3.117492252812128e-06, "loss": 1.5289, "step": 70980 }, { "epoch": 2.065334380545132, "grad_norm": 12.625, "learning_rate": 3.115552971139513e-06, "loss": 1.4181, "step": 71000 }, { "epoch": 2.06591616487768, "grad_norm": 10.5, "learning_rate": 3.1136136894668983e-06, "loss": 1.405, "step": 71020 }, { "epoch": 2.066497949210228, "grad_norm": 12.8125, "learning_rate": 3.1116744077942834e-06, "loss": 1.522, "step": 71040 }, { "epoch": 2.0670797335427755, "grad_norm": 15.8125, "learning_rate": 3.1097351261216684e-06, "loss": 1.5006, "step": 71060 }, { "epoch": 2.0676615178753237, "grad_norm": 12.3125, "learning_rate": 3.1077958444490535e-06, "loss": 1.529, "step": 71080 }, { "epoch": 2.0682433022078714, "grad_norm": 14.125, "learning_rate": 3.1058565627764386e-06, "loss": 1.4701, "step": 71100 }, { "epoch": 2.0688250865404196, "grad_norm": 16.0, "learning_rate": 3.1039172811038237e-06, "loss": 1.5067, "step": 71120 }, { "epoch": 2.0694068708729674, "grad_norm": 12.0, "learning_rate": 3.101977999431209e-06, "loss": 1.4489, "step": 71140 }, { "epoch": 2.069988655205515, "grad_norm": 12.125, "learning_rate": 3.100038717758594e-06, "loss": 1.4791, "step": 71160 }, { "epoch": 2.0705704395380633, "grad_norm": 14.25, "learning_rate": 3.098099436085979e-06, "loss": 1.4841, "step": 71180 }, { "epoch": 2.071152223870611, "grad_norm": 11.1875, "learning_rate": 3.096160154413364e-06, "loss": 1.4563, "step": 71200 }, { "epoch": 2.0717340082031592, "grad_norm": 14.125, "learning_rate": 3.0942208727407492e-06, "loss": 1.4879, "step": 71220 }, { "epoch": 2.072315792535707, "grad_norm": 14.875, "learning_rate": 3.0922815910681343e-06, "loss": 1.4111, "step": 71240 }, { "epoch": 2.0728975768682547, "grad_norm": 14.5625, "learning_rate": 3.0903423093955194e-06, "loss": 1.5508, "step": 71260 }, { "epoch": 2.073479361200803, "grad_norm": 14.875, "learning_rate": 3.0884030277229045e-06, "loss": 1.4548, "step": 71280 }, { "epoch": 2.0740611455333506, "grad_norm": 15.0625, "learning_rate": 3.0864637460502896e-06, "loss": 1.5113, "step": 71300 }, { "epoch": 2.074642929865899, "grad_norm": 13.4375, "learning_rate": 3.0845244643776743e-06, "loss": 1.4911, "step": 71320 }, { "epoch": 2.0752247141984466, "grad_norm": 12.1875, "learning_rate": 3.0825851827050594e-06, "loss": 1.5903, "step": 71340 }, { "epoch": 2.0758064985309947, "grad_norm": 14.0, "learning_rate": 3.0806459010324445e-06, "loss": 1.4759, "step": 71360 }, { "epoch": 2.0763882828635425, "grad_norm": 13.0, "learning_rate": 3.0787066193598296e-06, "loss": 1.493, "step": 71380 }, { "epoch": 2.0769700671960902, "grad_norm": 14.125, "learning_rate": 3.0767673376872147e-06, "loss": 1.4959, "step": 71400 }, { "epoch": 2.0775518515286384, "grad_norm": 14.4375, "learning_rate": 3.0748280560145998e-06, "loss": 1.4924, "step": 71420 }, { "epoch": 2.078133635861186, "grad_norm": 12.625, "learning_rate": 3.072888774341985e-06, "loss": 1.4623, "step": 71440 }, { "epoch": 2.0787154201937343, "grad_norm": 15.375, "learning_rate": 3.07094949266937e-06, "loss": 1.4677, "step": 71460 }, { "epoch": 2.079297204526282, "grad_norm": 13.6875, "learning_rate": 3.069010210996755e-06, "loss": 1.4981, "step": 71480 }, { "epoch": 2.07987898885883, "grad_norm": 13.875, "learning_rate": 3.06707092932414e-06, "loss": 1.4813, "step": 71500 }, { "epoch": 2.080460773191378, "grad_norm": 12.1875, "learning_rate": 3.0651316476515253e-06, "loss": 1.454, "step": 71520 }, { "epoch": 2.0810425575239258, "grad_norm": 11.5, "learning_rate": 3.0631923659789104e-06, "loss": 1.5398, "step": 71540 }, { "epoch": 2.081624341856474, "grad_norm": 13.9375, "learning_rate": 3.0612530843062955e-06, "loss": 1.5193, "step": 71560 }, { "epoch": 2.0822061261890217, "grad_norm": 11.875, "learning_rate": 3.0593138026336806e-06, "loss": 1.485, "step": 71580 }, { "epoch": 2.08278791052157, "grad_norm": 12.5625, "learning_rate": 3.0573745209610657e-06, "loss": 1.4731, "step": 71600 }, { "epoch": 2.0833696948541176, "grad_norm": 13.4375, "learning_rate": 3.0554352392884508e-06, "loss": 1.4991, "step": 71620 }, { "epoch": 2.0839514791866653, "grad_norm": 12.8125, "learning_rate": 3.053495957615836e-06, "loss": 1.5984, "step": 71640 }, { "epoch": 2.0845332635192135, "grad_norm": 18.25, "learning_rate": 3.051556675943221e-06, "loss": 1.4401, "step": 71660 }, { "epoch": 2.0851150478517613, "grad_norm": 15.0, "learning_rate": 3.049617394270606e-06, "loss": 1.4982, "step": 71680 }, { "epoch": 2.0856968321843095, "grad_norm": 12.125, "learning_rate": 3.047678112597991e-06, "loss": 1.442, "step": 71700 }, { "epoch": 2.086278616516857, "grad_norm": 14.9375, "learning_rate": 3.0457388309253762e-06, "loss": 1.4435, "step": 71720 }, { "epoch": 2.086860400849405, "grad_norm": 12.875, "learning_rate": 3.043799549252761e-06, "loss": 1.4565, "step": 71740 }, { "epoch": 2.087442185181953, "grad_norm": 16.75, "learning_rate": 3.041860267580146e-06, "loss": 1.5337, "step": 71760 }, { "epoch": 2.088023969514501, "grad_norm": 16.125, "learning_rate": 3.039920985907531e-06, "loss": 1.5205, "step": 71780 }, { "epoch": 2.088605753847049, "grad_norm": 13.5, "learning_rate": 3.037981704234916e-06, "loss": 1.4813, "step": 71800 }, { "epoch": 2.089187538179597, "grad_norm": 15.1875, "learning_rate": 3.0360424225623013e-06, "loss": 1.5243, "step": 71820 }, { "epoch": 2.0897693225121445, "grad_norm": 14.875, "learning_rate": 3.0341031408896864e-06, "loss": 1.4947, "step": 71840 }, { "epoch": 2.0903511068446927, "grad_norm": 14.5625, "learning_rate": 3.0321638592170715e-06, "loss": 1.4967, "step": 71860 }, { "epoch": 2.0909328911772405, "grad_norm": 11.875, "learning_rate": 3.0302245775444566e-06, "loss": 1.527, "step": 71880 }, { "epoch": 2.0915146755097886, "grad_norm": 13.625, "learning_rate": 3.0282852958718417e-06, "loss": 1.4975, "step": 71900 }, { "epoch": 2.0920964598423364, "grad_norm": 14.875, "learning_rate": 3.026346014199227e-06, "loss": 1.535, "step": 71920 }, { "epoch": 2.0926782441748846, "grad_norm": 16.375, "learning_rate": 3.024406732526612e-06, "loss": 1.5116, "step": 71940 }, { "epoch": 2.0932600285074323, "grad_norm": 14.5625, "learning_rate": 3.022467450853997e-06, "loss": 1.5866, "step": 71960 }, { "epoch": 2.09384181283998, "grad_norm": 12.75, "learning_rate": 3.020528169181382e-06, "loss": 1.468, "step": 71980 }, { "epoch": 2.0944235971725282, "grad_norm": 11.3125, "learning_rate": 3.018588887508767e-06, "loss": 1.5545, "step": 72000 }, { "epoch": 2.095005381505076, "grad_norm": 11.6875, "learning_rate": 3.0166496058361523e-06, "loss": 1.5561, "step": 72020 }, { "epoch": 2.095587165837624, "grad_norm": 16.5, "learning_rate": 3.0147103241635374e-06, "loss": 1.4659, "step": 72040 }, { "epoch": 2.096168950170172, "grad_norm": 12.875, "learning_rate": 3.0127710424909225e-06, "loss": 1.5107, "step": 72060 }, { "epoch": 2.0967507345027196, "grad_norm": 12.375, "learning_rate": 3.0108317608183076e-06, "loss": 1.4283, "step": 72080 }, { "epoch": 2.097332518835268, "grad_norm": 12.25, "learning_rate": 3.0088924791456927e-06, "loss": 1.5606, "step": 72100 }, { "epoch": 2.0979143031678156, "grad_norm": 11.6875, "learning_rate": 3.0069531974730778e-06, "loss": 1.5268, "step": 72120 }, { "epoch": 2.0984960875003638, "grad_norm": 13.375, "learning_rate": 3.0050139158004624e-06, "loss": 1.502, "step": 72140 }, { "epoch": 2.0990778718329115, "grad_norm": 14.375, "learning_rate": 3.0030746341278475e-06, "loss": 1.5983, "step": 72160 }, { "epoch": 2.0996596561654597, "grad_norm": 13.5, "learning_rate": 3.0011353524552326e-06, "loss": 1.586, "step": 72180 }, { "epoch": 2.1002414404980074, "grad_norm": 14.625, "learning_rate": 2.9991960707826177e-06, "loss": 1.4499, "step": 72200 }, { "epoch": 2.100823224830555, "grad_norm": 13.1875, "learning_rate": 2.997256789110003e-06, "loss": 1.5428, "step": 72220 }, { "epoch": 2.1014050091631034, "grad_norm": 15.4375, "learning_rate": 2.995317507437388e-06, "loss": 1.4782, "step": 72240 }, { "epoch": 2.101986793495651, "grad_norm": 12.875, "learning_rate": 2.993378225764773e-06, "loss": 1.4888, "step": 72260 }, { "epoch": 2.1025685778281993, "grad_norm": 11.25, "learning_rate": 2.991438944092158e-06, "loss": 1.521, "step": 72280 }, { "epoch": 2.103150362160747, "grad_norm": 14.625, "learning_rate": 2.9894996624195432e-06, "loss": 1.5041, "step": 72300 }, { "epoch": 2.1037321464932948, "grad_norm": 12.6875, "learning_rate": 2.9875603807469283e-06, "loss": 1.4956, "step": 72320 }, { "epoch": 2.104313930825843, "grad_norm": 12.75, "learning_rate": 2.9856210990743134e-06, "loss": 1.4933, "step": 72340 }, { "epoch": 2.1048957151583907, "grad_norm": 12.8125, "learning_rate": 2.9836818174016985e-06, "loss": 1.5132, "step": 72360 }, { "epoch": 2.105477499490939, "grad_norm": 11.75, "learning_rate": 2.9817425357290836e-06, "loss": 1.527, "step": 72380 }, { "epoch": 2.1060592838234866, "grad_norm": 13.8125, "learning_rate": 2.9798032540564687e-06, "loss": 1.4684, "step": 72400 }, { "epoch": 2.1066410681560344, "grad_norm": 12.0, "learning_rate": 2.977863972383854e-06, "loss": 1.5328, "step": 72420 }, { "epoch": 2.1072228524885825, "grad_norm": 14.6875, "learning_rate": 2.975924690711239e-06, "loss": 1.4832, "step": 72440 }, { "epoch": 2.1078046368211303, "grad_norm": 14.75, "learning_rate": 2.973985409038624e-06, "loss": 1.4761, "step": 72460 }, { "epoch": 2.1083864211536785, "grad_norm": 14.125, "learning_rate": 2.972046127366009e-06, "loss": 1.5431, "step": 72480 }, { "epoch": 2.108968205486226, "grad_norm": 15.5, "learning_rate": 2.970106845693394e-06, "loss": 1.5485, "step": 72500 }, { "epoch": 2.109549989818774, "grad_norm": 11.5625, "learning_rate": 2.9681675640207793e-06, "loss": 1.4798, "step": 72520 }, { "epoch": 2.110131774151322, "grad_norm": 15.75, "learning_rate": 2.966228282348164e-06, "loss": 1.5801, "step": 72540 }, { "epoch": 2.11071355848387, "grad_norm": 13.0, "learning_rate": 2.964289000675549e-06, "loss": 1.4505, "step": 72560 }, { "epoch": 2.111295342816418, "grad_norm": 13.5625, "learning_rate": 2.9623497190029337e-06, "loss": 1.4387, "step": 72580 }, { "epoch": 2.111877127148966, "grad_norm": 14.75, "learning_rate": 2.960410437330319e-06, "loss": 1.4776, "step": 72600 }, { "epoch": 2.112458911481514, "grad_norm": 14.75, "learning_rate": 2.958471155657704e-06, "loss": 1.4937, "step": 72620 }, { "epoch": 2.1130406958140617, "grad_norm": 12.6875, "learning_rate": 2.956531873985089e-06, "loss": 1.5073, "step": 72640 }, { "epoch": 2.1136224801466095, "grad_norm": 14.375, "learning_rate": 2.954592592312474e-06, "loss": 1.4216, "step": 72660 }, { "epoch": 2.1142042644791577, "grad_norm": 14.875, "learning_rate": 2.9526533106398592e-06, "loss": 1.4707, "step": 72680 }, { "epoch": 2.1147860488117054, "grad_norm": 15.0625, "learning_rate": 2.9507140289672443e-06, "loss": 1.4336, "step": 72700 }, { "epoch": 2.1153678331442536, "grad_norm": 15.125, "learning_rate": 2.9487747472946294e-06, "loss": 1.4724, "step": 72720 }, { "epoch": 2.1159496174768013, "grad_norm": 15.375, "learning_rate": 2.9468354656220145e-06, "loss": 1.4837, "step": 72740 }, { "epoch": 2.116531401809349, "grad_norm": 15.0625, "learning_rate": 2.9448961839493996e-06, "loss": 1.433, "step": 72760 }, { "epoch": 2.1171131861418973, "grad_norm": 13.5, "learning_rate": 2.9429569022767847e-06, "loss": 1.5166, "step": 72780 }, { "epoch": 2.117694970474445, "grad_norm": 14.625, "learning_rate": 2.94101762060417e-06, "loss": 1.4642, "step": 72800 }, { "epoch": 2.118276754806993, "grad_norm": 13.25, "learning_rate": 2.939078338931555e-06, "loss": 1.4825, "step": 72820 }, { "epoch": 2.118858539139541, "grad_norm": 13.0625, "learning_rate": 2.9371390572589396e-06, "loss": 1.4745, "step": 72840 }, { "epoch": 2.119440323472089, "grad_norm": 10.625, "learning_rate": 2.9351997755863247e-06, "loss": 1.4926, "step": 72860 }, { "epoch": 2.120022107804637, "grad_norm": 12.3125, "learning_rate": 2.9332604939137098e-06, "loss": 1.4766, "step": 72880 }, { "epoch": 2.1206038921371846, "grad_norm": 12.25, "learning_rate": 2.931321212241095e-06, "loss": 1.4237, "step": 72900 }, { "epoch": 2.1211856764697328, "grad_norm": 13.625, "learning_rate": 2.92938193056848e-06, "loss": 1.4522, "step": 72920 }, { "epoch": 2.1217674608022805, "grad_norm": 11.0625, "learning_rate": 2.927442648895865e-06, "loss": 1.4988, "step": 72940 }, { "epoch": 2.1223492451348287, "grad_norm": 12.3125, "learning_rate": 2.92550336722325e-06, "loss": 1.5132, "step": 72960 }, { "epoch": 2.1229310294673764, "grad_norm": 14.75, "learning_rate": 2.9235640855506353e-06, "loss": 1.4619, "step": 72980 }, { "epoch": 2.123512813799924, "grad_norm": 12.1875, "learning_rate": 2.9216248038780204e-06, "loss": 1.4034, "step": 73000 }, { "epoch": 2.1240945981324724, "grad_norm": 14.6875, "learning_rate": 2.9196855222054055e-06, "loss": 1.5266, "step": 73020 }, { "epoch": 2.12467638246502, "grad_norm": 14.5625, "learning_rate": 2.9177462405327906e-06, "loss": 1.5114, "step": 73040 }, { "epoch": 2.1252581667975683, "grad_norm": 14.3125, "learning_rate": 2.9158069588601757e-06, "loss": 1.4811, "step": 73060 }, { "epoch": 2.125839951130116, "grad_norm": 17.25, "learning_rate": 2.9138676771875608e-06, "loss": 1.5085, "step": 73080 }, { "epoch": 2.1264217354626638, "grad_norm": 14.1875, "learning_rate": 2.911928395514946e-06, "loss": 1.4951, "step": 73100 }, { "epoch": 2.127003519795212, "grad_norm": 12.8125, "learning_rate": 2.909989113842331e-06, "loss": 1.4602, "step": 73120 }, { "epoch": 2.1275853041277597, "grad_norm": 14.0, "learning_rate": 2.908049832169716e-06, "loss": 1.4742, "step": 73140 }, { "epoch": 2.128167088460308, "grad_norm": 14.4375, "learning_rate": 2.906110550497101e-06, "loss": 1.5108, "step": 73160 }, { "epoch": 2.1287488727928556, "grad_norm": 13.3125, "learning_rate": 2.9041712688244862e-06, "loss": 1.5393, "step": 73180 }, { "epoch": 2.129330657125404, "grad_norm": 12.625, "learning_rate": 2.9022319871518713e-06, "loss": 1.4763, "step": 73200 }, { "epoch": 2.1299124414579516, "grad_norm": 12.5, "learning_rate": 2.9002927054792564e-06, "loss": 1.4555, "step": 73220 }, { "epoch": 2.1304942257904993, "grad_norm": 13.375, "learning_rate": 2.898353423806641e-06, "loss": 1.5222, "step": 73240 }, { "epoch": 2.1310760101230475, "grad_norm": 15.5, "learning_rate": 2.896414142134026e-06, "loss": 1.5048, "step": 73260 }, { "epoch": 2.1316577944555952, "grad_norm": 15.625, "learning_rate": 2.8944748604614113e-06, "loss": 1.5817, "step": 73280 }, { "epoch": 2.1322395787881434, "grad_norm": 14.125, "learning_rate": 2.8925355787887964e-06, "loss": 1.4889, "step": 73300 }, { "epoch": 2.132821363120691, "grad_norm": 14.1875, "learning_rate": 2.8905962971161815e-06, "loss": 1.5541, "step": 73320 }, { "epoch": 2.133403147453239, "grad_norm": 13.875, "learning_rate": 2.8886570154435666e-06, "loss": 1.5182, "step": 73340 }, { "epoch": 2.133984931785787, "grad_norm": 13.25, "learning_rate": 2.8867177337709517e-06, "loss": 1.5519, "step": 73360 }, { "epoch": 2.134566716118335, "grad_norm": 14.375, "learning_rate": 2.884778452098337e-06, "loss": 1.4621, "step": 73380 }, { "epoch": 2.135148500450883, "grad_norm": 10.0, "learning_rate": 2.882839170425722e-06, "loss": 1.441, "step": 73400 }, { "epoch": 2.1357302847834307, "grad_norm": 12.9375, "learning_rate": 2.880899888753107e-06, "loss": 1.4363, "step": 73420 }, { "epoch": 2.136312069115979, "grad_norm": 15.875, "learning_rate": 2.878960607080492e-06, "loss": 1.5074, "step": 73440 }, { "epoch": 2.1368938534485267, "grad_norm": 14.5, "learning_rate": 2.877021325407877e-06, "loss": 1.4954, "step": 73460 }, { "epoch": 2.1374756377810744, "grad_norm": 17.375, "learning_rate": 2.8750820437352623e-06, "loss": 1.6136, "step": 73480 }, { "epoch": 2.1380574221136226, "grad_norm": 12.9375, "learning_rate": 2.8731427620626474e-06, "loss": 1.4908, "step": 73500 }, { "epoch": 2.1386392064461703, "grad_norm": 13.8125, "learning_rate": 2.8712034803900325e-06, "loss": 1.4283, "step": 73520 }, { "epoch": 2.1392209907787185, "grad_norm": 12.375, "learning_rate": 2.8692641987174176e-06, "loss": 1.4775, "step": 73540 }, { "epoch": 2.1398027751112663, "grad_norm": 14.1875, "learning_rate": 2.8673249170448027e-06, "loss": 1.562, "step": 73560 }, { "epoch": 2.140384559443814, "grad_norm": 14.0, "learning_rate": 2.8653856353721878e-06, "loss": 1.4407, "step": 73580 }, { "epoch": 2.140966343776362, "grad_norm": 12.5625, "learning_rate": 2.863446353699573e-06, "loss": 1.5493, "step": 73600 }, { "epoch": 2.14154812810891, "grad_norm": 12.25, "learning_rate": 2.861507072026958e-06, "loss": 1.4486, "step": 73620 }, { "epoch": 2.142129912441458, "grad_norm": 14.75, "learning_rate": 2.8595677903543426e-06, "loss": 1.5861, "step": 73640 }, { "epoch": 2.142711696774006, "grad_norm": 12.375, "learning_rate": 2.8576285086817277e-06, "loss": 1.5289, "step": 73660 }, { "epoch": 2.1432934811065536, "grad_norm": 14.0, "learning_rate": 2.855689227009113e-06, "loss": 1.5793, "step": 73680 }, { "epoch": 2.143875265439102, "grad_norm": 13.5625, "learning_rate": 2.853749945336498e-06, "loss": 1.481, "step": 73700 }, { "epoch": 2.1444570497716495, "grad_norm": 15.9375, "learning_rate": 2.851810663663883e-06, "loss": 1.4722, "step": 73720 }, { "epoch": 2.1450388341041977, "grad_norm": 12.1875, "learning_rate": 2.849871381991268e-06, "loss": 1.3964, "step": 73740 }, { "epoch": 2.1456206184367455, "grad_norm": 17.0, "learning_rate": 2.8479321003186532e-06, "loss": 1.4954, "step": 73760 }, { "epoch": 2.146202402769293, "grad_norm": 12.8125, "learning_rate": 2.8459928186460383e-06, "loss": 1.5448, "step": 73780 }, { "epoch": 2.1467841871018414, "grad_norm": 13.125, "learning_rate": 2.8440535369734234e-06, "loss": 1.5075, "step": 73800 }, { "epoch": 2.147365971434389, "grad_norm": 14.125, "learning_rate": 2.8421142553008085e-06, "loss": 1.485, "step": 73820 }, { "epoch": 2.1479477557669373, "grad_norm": 12.9375, "learning_rate": 2.8401749736281936e-06, "loss": 1.5189, "step": 73840 }, { "epoch": 2.148529540099485, "grad_norm": 13.5, "learning_rate": 2.8382356919555787e-06, "loss": 1.5812, "step": 73860 }, { "epoch": 2.1491113244320332, "grad_norm": 13.125, "learning_rate": 2.836296410282964e-06, "loss": 1.499, "step": 73880 }, { "epoch": 2.149693108764581, "grad_norm": 15.9375, "learning_rate": 2.834357128610349e-06, "loss": 1.4625, "step": 73900 }, { "epoch": 2.1502748930971287, "grad_norm": 13.1875, "learning_rate": 2.832417846937734e-06, "loss": 1.4651, "step": 73920 }, { "epoch": 2.150856677429677, "grad_norm": 10.4375, "learning_rate": 2.830478565265119e-06, "loss": 1.5115, "step": 73940 }, { "epoch": 2.1514384617622246, "grad_norm": 12.1875, "learning_rate": 2.828539283592504e-06, "loss": 1.4763, "step": 73960 }, { "epoch": 2.152020246094773, "grad_norm": 11.625, "learning_rate": 2.8266000019198893e-06, "loss": 1.5031, "step": 73980 }, { "epoch": 2.1526020304273206, "grad_norm": 14.6875, "learning_rate": 2.8246607202472744e-06, "loss": 1.5051, "step": 74000 }, { "epoch": 2.1531838147598683, "grad_norm": 14.875, "learning_rate": 2.8227214385746595e-06, "loss": 1.4666, "step": 74020 }, { "epoch": 2.1537655990924165, "grad_norm": 12.25, "learning_rate": 2.8207821569020446e-06, "loss": 1.4348, "step": 74040 }, { "epoch": 2.1543473834249642, "grad_norm": 13.0, "learning_rate": 2.8188428752294293e-06, "loss": 1.4955, "step": 74060 }, { "epoch": 2.1549291677575124, "grad_norm": 16.25, "learning_rate": 2.8169035935568144e-06, "loss": 1.5198, "step": 74080 }, { "epoch": 2.15551095209006, "grad_norm": 13.5, "learning_rate": 2.8149643118841995e-06, "loss": 1.4427, "step": 74100 }, { "epoch": 2.1560927364226083, "grad_norm": 13.625, "learning_rate": 2.8130250302115845e-06, "loss": 1.4765, "step": 74120 }, { "epoch": 2.156674520755156, "grad_norm": 11.125, "learning_rate": 2.8110857485389696e-06, "loss": 1.4595, "step": 74140 }, { "epoch": 2.157256305087704, "grad_norm": 13.5625, "learning_rate": 2.8091464668663547e-06, "loss": 1.5147, "step": 74160 }, { "epoch": 2.157838089420252, "grad_norm": 12.8125, "learning_rate": 2.80720718519374e-06, "loss": 1.5748, "step": 74180 }, { "epoch": 2.1584198737527998, "grad_norm": 13.4375, "learning_rate": 2.805267903521125e-06, "loss": 1.5331, "step": 74200 }, { "epoch": 2.159001658085348, "grad_norm": 14.4375, "learning_rate": 2.80332862184851e-06, "loss": 1.4877, "step": 74220 }, { "epoch": 2.1595834424178957, "grad_norm": 13.25, "learning_rate": 2.801389340175895e-06, "loss": 1.4491, "step": 74240 }, { "epoch": 2.1601652267504434, "grad_norm": 14.125, "learning_rate": 2.7994500585032802e-06, "loss": 1.4192, "step": 74260 }, { "epoch": 2.1607470110829916, "grad_norm": 15.125, "learning_rate": 2.7975107768306653e-06, "loss": 1.5257, "step": 74280 }, { "epoch": 2.1613287954155393, "grad_norm": 13.75, "learning_rate": 2.7955714951580504e-06, "loss": 1.4681, "step": 74300 }, { "epoch": 2.1619105797480875, "grad_norm": 17.0, "learning_rate": 2.7936322134854355e-06, "loss": 1.5076, "step": 74320 }, { "epoch": 2.1624923640806353, "grad_norm": 12.625, "learning_rate": 2.7916929318128206e-06, "loss": 1.5422, "step": 74340 }, { "epoch": 2.163074148413183, "grad_norm": 13.9375, "learning_rate": 2.7897536501402057e-06, "loss": 1.5043, "step": 74360 }, { "epoch": 2.163655932745731, "grad_norm": 13.5, "learning_rate": 2.787814368467591e-06, "loss": 1.5271, "step": 74380 }, { "epoch": 2.164237717078279, "grad_norm": 13.875, "learning_rate": 2.785875086794976e-06, "loss": 1.3912, "step": 74400 }, { "epoch": 2.164819501410827, "grad_norm": 20.125, "learning_rate": 2.783935805122361e-06, "loss": 1.5108, "step": 74420 }, { "epoch": 2.165401285743375, "grad_norm": 15.6875, "learning_rate": 2.781996523449746e-06, "loss": 1.5373, "step": 74440 }, { "epoch": 2.165983070075923, "grad_norm": 12.25, "learning_rate": 2.7800572417771308e-06, "loss": 1.5474, "step": 74460 }, { "epoch": 2.166564854408471, "grad_norm": 13.6875, "learning_rate": 2.778117960104516e-06, "loss": 1.4762, "step": 74480 }, { "epoch": 2.1671466387410185, "grad_norm": 13.1875, "learning_rate": 2.776178678431901e-06, "loss": 1.485, "step": 74500 }, { "epoch": 2.1677284230735667, "grad_norm": 13.375, "learning_rate": 2.774239396759286e-06, "loss": 1.4827, "step": 74520 }, { "epoch": 2.1683102074061145, "grad_norm": 15.0, "learning_rate": 2.772300115086671e-06, "loss": 1.4605, "step": 74540 }, { "epoch": 2.1688919917386627, "grad_norm": 14.1875, "learning_rate": 2.7703608334140563e-06, "loss": 1.4337, "step": 74560 }, { "epoch": 2.1694737760712104, "grad_norm": 13.625, "learning_rate": 2.7684215517414414e-06, "loss": 1.4334, "step": 74580 }, { "epoch": 2.170055560403758, "grad_norm": 13.1875, "learning_rate": 2.7664822700688265e-06, "loss": 1.5381, "step": 74600 }, { "epoch": 2.1706373447363063, "grad_norm": 14.0625, "learning_rate": 2.7645429883962116e-06, "loss": 1.4843, "step": 74620 }, { "epoch": 2.171219129068854, "grad_norm": 14.5, "learning_rate": 2.7626037067235967e-06, "loss": 1.5029, "step": 74640 }, { "epoch": 2.1718009134014022, "grad_norm": 11.8125, "learning_rate": 2.7606644250509818e-06, "loss": 1.4978, "step": 74660 }, { "epoch": 2.17238269773395, "grad_norm": 13.625, "learning_rate": 2.758725143378367e-06, "loss": 1.537, "step": 74680 }, { "epoch": 2.172964482066498, "grad_norm": 14.5, "learning_rate": 2.7567858617057515e-06, "loss": 1.5458, "step": 74700 }, { "epoch": 2.173546266399046, "grad_norm": 15.875, "learning_rate": 2.7548465800331366e-06, "loss": 1.544, "step": 74720 }, { "epoch": 2.1741280507315937, "grad_norm": 12.625, "learning_rate": 2.7529072983605213e-06, "loss": 1.4829, "step": 74740 }, { "epoch": 2.174709835064142, "grad_norm": 15.5, "learning_rate": 2.7509680166879064e-06, "loss": 1.5048, "step": 74760 }, { "epoch": 2.1752916193966896, "grad_norm": 13.3125, "learning_rate": 2.7490287350152915e-06, "loss": 1.5169, "step": 74780 }, { "epoch": 2.1758734037292378, "grad_norm": 12.1875, "learning_rate": 2.7470894533426766e-06, "loss": 1.5291, "step": 74800 }, { "epoch": 2.1764551880617855, "grad_norm": 16.125, "learning_rate": 2.7451501716700617e-06, "loss": 1.5344, "step": 74820 }, { "epoch": 2.1770369723943332, "grad_norm": 11.375, "learning_rate": 2.743210889997447e-06, "loss": 1.558, "step": 74840 }, { "epoch": 2.1776187567268814, "grad_norm": 10.875, "learning_rate": 2.741271608324832e-06, "loss": 1.4821, "step": 74860 }, { "epoch": 2.178200541059429, "grad_norm": 13.4375, "learning_rate": 2.739332326652217e-06, "loss": 1.4888, "step": 74880 }, { "epoch": 2.1787823253919774, "grad_norm": 14.8125, "learning_rate": 2.737393044979602e-06, "loss": 1.5181, "step": 74900 }, { "epoch": 2.179364109724525, "grad_norm": 13.625, "learning_rate": 2.735453763306987e-06, "loss": 1.5534, "step": 74920 }, { "epoch": 2.179945894057073, "grad_norm": 15.8125, "learning_rate": 2.7335144816343723e-06, "loss": 1.4914, "step": 74940 }, { "epoch": 2.180527678389621, "grad_norm": 12.5, "learning_rate": 2.7315751999617574e-06, "loss": 1.4862, "step": 74960 }, { "epoch": 2.1811094627221688, "grad_norm": 22.5, "learning_rate": 2.7296359182891425e-06, "loss": 1.5262, "step": 74980 }, { "epoch": 2.181691247054717, "grad_norm": 13.8125, "learning_rate": 2.7276966366165276e-06, "loss": 1.4733, "step": 75000 }, { "epoch": 2.1822730313872647, "grad_norm": 13.1875, "learning_rate": 2.7257573549439127e-06, "loss": 1.3873, "step": 75020 }, { "epoch": 2.1828548157198124, "grad_norm": 11.375, "learning_rate": 2.7238180732712978e-06, "loss": 1.5585, "step": 75040 }, { "epoch": 2.1834366000523606, "grad_norm": 18.25, "learning_rate": 2.721878791598683e-06, "loss": 1.5439, "step": 75060 }, { "epoch": 2.1840183843849084, "grad_norm": 12.5, "learning_rate": 2.719939509926068e-06, "loss": 1.4403, "step": 75080 }, { "epoch": 2.1846001687174565, "grad_norm": 12.625, "learning_rate": 2.718000228253453e-06, "loss": 1.4441, "step": 75100 }, { "epoch": 2.1851819530500043, "grad_norm": 14.0625, "learning_rate": 2.716060946580838e-06, "loss": 1.6107, "step": 75120 }, { "epoch": 2.1857637373825525, "grad_norm": 13.0, "learning_rate": 2.7141216649082233e-06, "loss": 1.4784, "step": 75140 }, { "epoch": 2.1863455217151, "grad_norm": 15.6875, "learning_rate": 2.712182383235608e-06, "loss": 1.5381, "step": 75160 }, { "epoch": 2.186927306047648, "grad_norm": 13.1875, "learning_rate": 2.710243101562993e-06, "loss": 1.4521, "step": 75180 }, { "epoch": 2.187509090380196, "grad_norm": 11.9375, "learning_rate": 2.708303819890378e-06, "loss": 1.4395, "step": 75200 }, { "epoch": 2.188090874712744, "grad_norm": 12.75, "learning_rate": 2.7063645382177632e-06, "loss": 1.5815, "step": 75220 }, { "epoch": 2.188672659045292, "grad_norm": 10.625, "learning_rate": 2.7044252565451483e-06, "loss": 1.4308, "step": 75240 }, { "epoch": 2.18925444337784, "grad_norm": 14.75, "learning_rate": 2.7024859748725334e-06, "loss": 1.5653, "step": 75260 }, { "epoch": 2.189836227710388, "grad_norm": 12.25, "learning_rate": 2.7005466931999185e-06, "loss": 1.5253, "step": 75280 }, { "epoch": 2.1904180120429357, "grad_norm": 14.25, "learning_rate": 2.6986074115273036e-06, "loss": 1.4762, "step": 75300 }, { "epoch": 2.1909997963754835, "grad_norm": 11.1875, "learning_rate": 2.6966681298546887e-06, "loss": 1.5231, "step": 75320 }, { "epoch": 2.1915815807080317, "grad_norm": 12.4375, "learning_rate": 2.694728848182074e-06, "loss": 1.566, "step": 75340 }, { "epoch": 2.1921633650405794, "grad_norm": 11.75, "learning_rate": 2.692789566509459e-06, "loss": 1.5473, "step": 75360 }, { "epoch": 2.1927451493731276, "grad_norm": 13.625, "learning_rate": 2.690850284836844e-06, "loss": 1.4593, "step": 75380 }, { "epoch": 2.1933269337056753, "grad_norm": 14.3125, "learning_rate": 2.688911003164229e-06, "loss": 1.4661, "step": 75400 }, { "epoch": 2.193908718038223, "grad_norm": 13.3125, "learning_rate": 2.686971721491614e-06, "loss": 1.4729, "step": 75420 }, { "epoch": 2.1944905023707713, "grad_norm": 14.4375, "learning_rate": 2.6850324398189993e-06, "loss": 1.4975, "step": 75440 }, { "epoch": 2.195072286703319, "grad_norm": 11.375, "learning_rate": 2.6830931581463844e-06, "loss": 1.4666, "step": 75460 }, { "epoch": 2.195654071035867, "grad_norm": 14.4375, "learning_rate": 2.6811538764737695e-06, "loss": 1.4428, "step": 75480 }, { "epoch": 2.196235855368415, "grad_norm": 14.0625, "learning_rate": 2.6792145948011546e-06, "loss": 1.5145, "step": 75500 }, { "epoch": 2.1968176397009627, "grad_norm": 15.25, "learning_rate": 2.6772753131285397e-06, "loss": 1.5086, "step": 75520 }, { "epoch": 2.197399424033511, "grad_norm": 13.5, "learning_rate": 2.6753360314559248e-06, "loss": 1.4997, "step": 75540 }, { "epoch": 2.1979812083660586, "grad_norm": 14.3125, "learning_rate": 2.6733967497833095e-06, "loss": 1.4753, "step": 75560 }, { "epoch": 2.1985629926986068, "grad_norm": 12.75, "learning_rate": 2.6714574681106945e-06, "loss": 1.5529, "step": 75580 }, { "epoch": 2.1991447770311545, "grad_norm": 13.4375, "learning_rate": 2.6695181864380796e-06, "loss": 1.5082, "step": 75600 }, { "epoch": 2.1997265613637023, "grad_norm": 12.0625, "learning_rate": 2.6675789047654647e-06, "loss": 1.6554, "step": 75620 }, { "epoch": 2.2003083456962504, "grad_norm": 14.5, "learning_rate": 2.66563962309285e-06, "loss": 1.5155, "step": 75640 }, { "epoch": 2.200890130028798, "grad_norm": 10.625, "learning_rate": 2.663700341420235e-06, "loss": 1.4859, "step": 75660 }, { "epoch": 2.2014719143613464, "grad_norm": 13.0625, "learning_rate": 2.66176105974762e-06, "loss": 1.4973, "step": 75680 }, { "epoch": 2.202053698693894, "grad_norm": 11.9375, "learning_rate": 2.659821778075005e-06, "loss": 1.4262, "step": 75700 }, { "epoch": 2.2026354830264423, "grad_norm": 14.0625, "learning_rate": 2.6578824964023902e-06, "loss": 1.4614, "step": 75720 }, { "epoch": 2.20321726735899, "grad_norm": 12.0, "learning_rate": 2.6559432147297753e-06, "loss": 1.5203, "step": 75740 }, { "epoch": 2.203799051691538, "grad_norm": 13.0625, "learning_rate": 2.6540039330571604e-06, "loss": 1.5362, "step": 75760 }, { "epoch": 2.204380836024086, "grad_norm": 12.8125, "learning_rate": 2.6520646513845455e-06, "loss": 1.5148, "step": 75780 }, { "epoch": 2.2049626203566337, "grad_norm": 8.5625, "learning_rate": 2.6501253697119306e-06, "loss": 1.4596, "step": 75800 }, { "epoch": 2.205544404689182, "grad_norm": 14.0, "learning_rate": 2.6481860880393157e-06, "loss": 1.5355, "step": 75820 }, { "epoch": 2.2061261890217296, "grad_norm": 14.75, "learning_rate": 2.646246806366701e-06, "loss": 1.5136, "step": 75840 }, { "epoch": 2.2067079733542774, "grad_norm": 11.4375, "learning_rate": 2.644307524694086e-06, "loss": 1.4803, "step": 75860 }, { "epoch": 2.2072897576868256, "grad_norm": 12.9375, "learning_rate": 2.642368243021471e-06, "loss": 1.5108, "step": 75880 }, { "epoch": 2.2078715420193733, "grad_norm": 13.1875, "learning_rate": 2.640428961348856e-06, "loss": 1.4315, "step": 75900 }, { "epoch": 2.2084533263519215, "grad_norm": 12.3125, "learning_rate": 2.638489679676241e-06, "loss": 1.487, "step": 75920 }, { "epoch": 2.2090351106844692, "grad_norm": 11.9375, "learning_rate": 2.6365503980036263e-06, "loss": 1.486, "step": 75940 }, { "epoch": 2.2096168950170174, "grad_norm": 11.9375, "learning_rate": 2.634611116331011e-06, "loss": 1.574, "step": 75960 }, { "epoch": 2.210198679349565, "grad_norm": 13.8125, "learning_rate": 2.632671834658396e-06, "loss": 1.4623, "step": 75980 }, { "epoch": 2.210780463682113, "grad_norm": 13.125, "learning_rate": 2.630732552985781e-06, "loss": 1.4845, "step": 76000 }, { "epoch": 2.211362248014661, "grad_norm": 16.125, "learning_rate": 2.6287932713131663e-06, "loss": 1.4634, "step": 76020 }, { "epoch": 2.211944032347209, "grad_norm": 15.5625, "learning_rate": 2.6268539896405514e-06, "loss": 1.4881, "step": 76040 }, { "epoch": 2.212525816679757, "grad_norm": 14.8125, "learning_rate": 2.6249147079679365e-06, "loss": 1.5763, "step": 76060 }, { "epoch": 2.2131076010123047, "grad_norm": 10.8125, "learning_rate": 2.6229754262953216e-06, "loss": 1.3982, "step": 76080 }, { "epoch": 2.2136893853448525, "grad_norm": 11.9375, "learning_rate": 2.6210361446227067e-06, "loss": 1.5016, "step": 76100 }, { "epoch": 2.2142711696774007, "grad_norm": 13.5625, "learning_rate": 2.6190968629500918e-06, "loss": 1.5301, "step": 76120 }, { "epoch": 2.2148529540099484, "grad_norm": 14.25, "learning_rate": 2.617157581277477e-06, "loss": 1.5179, "step": 76140 }, { "epoch": 2.2154347383424966, "grad_norm": 10.9375, "learning_rate": 2.615218299604862e-06, "loss": 1.4687, "step": 76160 }, { "epoch": 2.2160165226750443, "grad_norm": 12.75, "learning_rate": 2.613279017932247e-06, "loss": 1.4994, "step": 76180 }, { "epoch": 2.216598307007592, "grad_norm": 12.4375, "learning_rate": 2.611339736259632e-06, "loss": 1.4692, "step": 76200 }, { "epoch": 2.2171800913401403, "grad_norm": 13.875, "learning_rate": 2.6094004545870172e-06, "loss": 1.5165, "step": 76220 }, { "epoch": 2.217761875672688, "grad_norm": 13.1875, "learning_rate": 2.6074611729144023e-06, "loss": 1.4748, "step": 76240 }, { "epoch": 2.218343660005236, "grad_norm": 14.625, "learning_rate": 2.6055218912417874e-06, "loss": 1.4563, "step": 76260 }, { "epoch": 2.218925444337784, "grad_norm": 13.0625, "learning_rate": 2.6035826095691725e-06, "loss": 1.5219, "step": 76280 }, { "epoch": 2.2195072286703317, "grad_norm": 14.0, "learning_rate": 2.6016433278965576e-06, "loss": 1.5895, "step": 76300 }, { "epoch": 2.22008901300288, "grad_norm": 14.5625, "learning_rate": 2.5997040462239427e-06, "loss": 1.534, "step": 76320 }, { "epoch": 2.2206707973354276, "grad_norm": 13.3125, "learning_rate": 2.597764764551328e-06, "loss": 1.4324, "step": 76340 }, { "epoch": 2.221252581667976, "grad_norm": 13.5, "learning_rate": 2.595825482878713e-06, "loss": 1.5725, "step": 76360 }, { "epoch": 2.2218343660005235, "grad_norm": 15.125, "learning_rate": 2.5938862012060976e-06, "loss": 1.4665, "step": 76380 }, { "epoch": 2.2224161503330717, "grad_norm": 15.875, "learning_rate": 2.5919469195334827e-06, "loss": 1.5412, "step": 76400 }, { "epoch": 2.2229979346656195, "grad_norm": 12.875, "learning_rate": 2.590007637860868e-06, "loss": 1.452, "step": 76420 }, { "epoch": 2.223579718998167, "grad_norm": 12.9375, "learning_rate": 2.588068356188253e-06, "loss": 1.5128, "step": 76440 }, { "epoch": 2.2241615033307154, "grad_norm": 13.0625, "learning_rate": 2.586129074515638e-06, "loss": 1.5441, "step": 76460 }, { "epoch": 2.224743287663263, "grad_norm": 11.625, "learning_rate": 2.584189792843023e-06, "loss": 1.5348, "step": 76480 }, { "epoch": 2.2253250719958113, "grad_norm": 12.625, "learning_rate": 2.582250511170408e-06, "loss": 1.4825, "step": 76500 }, { "epoch": 2.225906856328359, "grad_norm": 12.3125, "learning_rate": 2.5803112294977933e-06, "loss": 1.5192, "step": 76520 }, { "epoch": 2.2264886406609072, "grad_norm": 15.9375, "learning_rate": 2.5783719478251784e-06, "loss": 1.5104, "step": 76540 }, { "epoch": 2.227070424993455, "grad_norm": 12.4375, "learning_rate": 2.5764326661525635e-06, "loss": 1.4994, "step": 76560 }, { "epoch": 2.2276522093260027, "grad_norm": 14.5, "learning_rate": 2.5744933844799486e-06, "loss": 1.5483, "step": 76580 }, { "epoch": 2.228233993658551, "grad_norm": 16.75, "learning_rate": 2.5725541028073337e-06, "loss": 1.4975, "step": 76600 }, { "epoch": 2.2288157779910986, "grad_norm": 13.0, "learning_rate": 2.5706148211347188e-06, "loss": 1.4466, "step": 76620 }, { "epoch": 2.229397562323647, "grad_norm": 12.0, "learning_rate": 2.568675539462104e-06, "loss": 1.444, "step": 76640 }, { "epoch": 2.2299793466561946, "grad_norm": 15.0, "learning_rate": 2.566736257789489e-06, "loss": 1.4896, "step": 76660 }, { "epoch": 2.2305611309887423, "grad_norm": 13.5625, "learning_rate": 2.564796976116874e-06, "loss": 1.4873, "step": 76680 }, { "epoch": 2.2311429153212905, "grad_norm": 15.0625, "learning_rate": 2.562857694444259e-06, "loss": 1.4281, "step": 76700 }, { "epoch": 2.2317246996538382, "grad_norm": 13.8125, "learning_rate": 2.5609184127716443e-06, "loss": 1.5257, "step": 76720 }, { "epoch": 2.2323064839863864, "grad_norm": 14.6875, "learning_rate": 2.5589791310990294e-06, "loss": 1.4407, "step": 76740 }, { "epoch": 2.232888268318934, "grad_norm": 12.8125, "learning_rate": 2.5570398494264145e-06, "loss": 1.4499, "step": 76760 }, { "epoch": 2.233470052651482, "grad_norm": 16.25, "learning_rate": 2.555100567753799e-06, "loss": 1.4619, "step": 76780 }, { "epoch": 2.23405183698403, "grad_norm": 13.5625, "learning_rate": 2.5531612860811842e-06, "loss": 1.5441, "step": 76800 }, { "epoch": 2.234633621316578, "grad_norm": 11.125, "learning_rate": 2.551222004408569e-06, "loss": 1.5077, "step": 76820 }, { "epoch": 2.235215405649126, "grad_norm": 12.125, "learning_rate": 2.549282722735954e-06, "loss": 1.5183, "step": 76840 }, { "epoch": 2.2357971899816738, "grad_norm": 14.3125, "learning_rate": 2.547343441063339e-06, "loss": 1.4919, "step": 76860 }, { "epoch": 2.2363789743142215, "grad_norm": 16.0, "learning_rate": 2.545404159390724e-06, "loss": 1.544, "step": 76880 }, { "epoch": 2.2369607586467697, "grad_norm": 13.4375, "learning_rate": 2.5434648777181093e-06, "loss": 1.4385, "step": 76900 }, { "epoch": 2.2375425429793174, "grad_norm": 12.375, "learning_rate": 2.5415255960454944e-06, "loss": 1.5499, "step": 76920 }, { "epoch": 2.2381243273118656, "grad_norm": 11.75, "learning_rate": 2.5395863143728795e-06, "loss": 1.5124, "step": 76940 }, { "epoch": 2.2387061116444134, "grad_norm": 13.875, "learning_rate": 2.5376470327002646e-06, "loss": 1.5421, "step": 76960 }, { "epoch": 2.2392878959769615, "grad_norm": 14.1875, "learning_rate": 2.5357077510276497e-06, "loss": 1.5072, "step": 76980 }, { "epoch": 2.2398696803095093, "grad_norm": 13.0, "learning_rate": 2.5337684693550348e-06, "loss": 1.5009, "step": 77000 }, { "epoch": 2.240451464642057, "grad_norm": 13.1875, "learning_rate": 2.53182918768242e-06, "loss": 1.4685, "step": 77020 }, { "epoch": 2.241033248974605, "grad_norm": 13.0, "learning_rate": 2.529889906009805e-06, "loss": 1.5346, "step": 77040 }, { "epoch": 2.241615033307153, "grad_norm": 11.875, "learning_rate": 2.5279506243371896e-06, "loss": 1.4772, "step": 77060 }, { "epoch": 2.242196817639701, "grad_norm": 14.5625, "learning_rate": 2.5260113426645747e-06, "loss": 1.5247, "step": 77080 }, { "epoch": 2.242778601972249, "grad_norm": 14.5, "learning_rate": 2.52407206099196e-06, "loss": 1.5067, "step": 77100 }, { "epoch": 2.2433603863047966, "grad_norm": 13.25, "learning_rate": 2.522132779319345e-06, "loss": 1.5404, "step": 77120 }, { "epoch": 2.243942170637345, "grad_norm": 11.0625, "learning_rate": 2.52019349764673e-06, "loss": 1.4202, "step": 77140 }, { "epoch": 2.2445239549698925, "grad_norm": 12.5625, "learning_rate": 2.518254215974115e-06, "loss": 1.512, "step": 77160 }, { "epoch": 2.2451057393024407, "grad_norm": 12.25, "learning_rate": 2.5163149343015002e-06, "loss": 1.4833, "step": 77180 }, { "epoch": 2.2456875236349885, "grad_norm": 11.3125, "learning_rate": 2.5143756526288853e-06, "loss": 1.4444, "step": 77200 }, { "epoch": 2.2462693079675367, "grad_norm": 14.9375, "learning_rate": 2.5124363709562704e-06, "loss": 1.4819, "step": 77220 }, { "epoch": 2.2468510923000844, "grad_norm": 14.9375, "learning_rate": 2.5104970892836555e-06, "loss": 1.4552, "step": 77240 }, { "epoch": 2.247432876632632, "grad_norm": 12.625, "learning_rate": 2.5085578076110406e-06, "loss": 1.6139, "step": 77260 }, { "epoch": 2.2480146609651803, "grad_norm": 15.8125, "learning_rate": 2.5066185259384257e-06, "loss": 1.5939, "step": 77280 }, { "epoch": 2.248596445297728, "grad_norm": 10.625, "learning_rate": 2.504679244265811e-06, "loss": 1.4841, "step": 77300 }, { "epoch": 2.2491782296302762, "grad_norm": 13.125, "learning_rate": 2.502739962593196e-06, "loss": 1.5859, "step": 77320 }, { "epoch": 2.249760013962824, "grad_norm": 13.5, "learning_rate": 2.500800680920581e-06, "loss": 1.4813, "step": 77340 }, { "epoch": 2.2503417982953717, "grad_norm": 14.125, "learning_rate": 2.498861399247966e-06, "loss": 1.4882, "step": 77360 }, { "epoch": 2.25092358262792, "grad_norm": 14.1875, "learning_rate": 2.496922117575351e-06, "loss": 1.445, "step": 77380 }, { "epoch": 2.2515053669604677, "grad_norm": 12.625, "learning_rate": 2.4949828359027363e-06, "loss": 1.5974, "step": 77400 }, { "epoch": 2.252087151293016, "grad_norm": 12.8125, "learning_rate": 2.4930435542301214e-06, "loss": 1.562, "step": 77420 }, { "epoch": 2.2526689356255636, "grad_norm": 14.4375, "learning_rate": 2.4911042725575065e-06, "loss": 1.5872, "step": 77440 }, { "epoch": 2.2532507199581113, "grad_norm": 14.4375, "learning_rate": 2.4891649908848916e-06, "loss": 1.5831, "step": 77460 }, { "epoch": 2.2538325042906595, "grad_norm": 11.75, "learning_rate": 2.4872257092122763e-06, "loss": 1.5468, "step": 77480 }, { "epoch": 2.2544142886232073, "grad_norm": 12.875, "learning_rate": 2.4852864275396614e-06, "loss": 1.4803, "step": 77500 }, { "epoch": 2.2549960729557554, "grad_norm": 12.375, "learning_rate": 2.4833471458670465e-06, "loss": 1.4564, "step": 77520 }, { "epoch": 2.255577857288303, "grad_norm": 13.0625, "learning_rate": 2.4814078641944316e-06, "loss": 1.5319, "step": 77540 }, { "epoch": 2.256159641620851, "grad_norm": 16.375, "learning_rate": 2.4794685825218167e-06, "loss": 1.5106, "step": 77560 }, { "epoch": 2.256741425953399, "grad_norm": 13.8125, "learning_rate": 2.4775293008492018e-06, "loss": 1.5285, "step": 77580 }, { "epoch": 2.257323210285947, "grad_norm": 12.375, "learning_rate": 2.475590019176587e-06, "loss": 1.47, "step": 77600 }, { "epoch": 2.257904994618495, "grad_norm": 13.6875, "learning_rate": 2.473650737503972e-06, "loss": 1.4906, "step": 77620 }, { "epoch": 2.2584867789510428, "grad_norm": 15.375, "learning_rate": 2.471711455831357e-06, "loss": 1.5675, "step": 77640 }, { "epoch": 2.259068563283591, "grad_norm": 13.5, "learning_rate": 2.469772174158742e-06, "loss": 1.3821, "step": 77660 }, { "epoch": 2.2596503476161387, "grad_norm": 13.625, "learning_rate": 2.4678328924861272e-06, "loss": 1.5581, "step": 77680 }, { "epoch": 2.2602321319486864, "grad_norm": 12.0, "learning_rate": 2.4658936108135123e-06, "loss": 1.4272, "step": 77700 }, { "epoch": 2.2608139162812346, "grad_norm": 13.0, "learning_rate": 2.4639543291408974e-06, "loss": 1.4901, "step": 77720 }, { "epoch": 2.2613957006137824, "grad_norm": 10.625, "learning_rate": 2.4620150474682825e-06, "loss": 1.4473, "step": 77740 }, { "epoch": 2.2619774849463306, "grad_norm": 12.875, "learning_rate": 2.4600757657956676e-06, "loss": 1.5642, "step": 77760 }, { "epoch": 2.2625592692788783, "grad_norm": 13.9375, "learning_rate": 2.4581364841230527e-06, "loss": 1.4057, "step": 77780 }, { "epoch": 2.2631410536114265, "grad_norm": 11.6875, "learning_rate": 2.456197202450438e-06, "loss": 1.5791, "step": 77800 }, { "epoch": 2.263722837943974, "grad_norm": 12.3125, "learning_rate": 2.454257920777823e-06, "loss": 1.4757, "step": 77820 }, { "epoch": 2.264304622276522, "grad_norm": 13.5, "learning_rate": 2.452318639105208e-06, "loss": 1.4728, "step": 77840 }, { "epoch": 2.26488640660907, "grad_norm": 12.625, "learning_rate": 2.450379357432593e-06, "loss": 1.5004, "step": 77860 }, { "epoch": 2.265468190941618, "grad_norm": 13.0, "learning_rate": 2.448440075759978e-06, "loss": 1.4871, "step": 77880 }, { "epoch": 2.266049975274166, "grad_norm": 15.1875, "learning_rate": 2.446500794087363e-06, "loss": 1.5657, "step": 77900 }, { "epoch": 2.266631759606714, "grad_norm": 11.9375, "learning_rate": 2.444561512414748e-06, "loss": 1.553, "step": 77920 }, { "epoch": 2.2672135439392616, "grad_norm": 12.4375, "learning_rate": 2.442622230742133e-06, "loss": 1.4405, "step": 77940 }, { "epoch": 2.2677953282718097, "grad_norm": 14.3125, "learning_rate": 2.440682949069518e-06, "loss": 1.5006, "step": 77960 }, { "epoch": 2.2683771126043575, "grad_norm": 12.5, "learning_rate": 2.4387436673969033e-06, "loss": 1.4933, "step": 77980 }, { "epoch": 2.2689588969369057, "grad_norm": 13.75, "learning_rate": 2.4368043857242884e-06, "loss": 1.452, "step": 78000 }, { "epoch": 2.2695406812694534, "grad_norm": 14.75, "learning_rate": 2.4348651040516735e-06, "loss": 1.6161, "step": 78020 }, { "epoch": 2.270122465602001, "grad_norm": 11.8125, "learning_rate": 2.4329258223790586e-06, "loss": 1.5615, "step": 78040 }, { "epoch": 2.2707042499345493, "grad_norm": 13.0, "learning_rate": 2.4309865407064437e-06, "loss": 1.468, "step": 78060 }, { "epoch": 2.271286034267097, "grad_norm": 10.1875, "learning_rate": 2.4290472590338288e-06, "loss": 1.5075, "step": 78080 }, { "epoch": 2.2718678185996453, "grad_norm": 17.5, "learning_rate": 2.427107977361214e-06, "loss": 1.5169, "step": 78100 }, { "epoch": 2.272449602932193, "grad_norm": 15.125, "learning_rate": 2.425168695688599e-06, "loss": 1.5831, "step": 78120 }, { "epoch": 2.2730313872647407, "grad_norm": 13.6875, "learning_rate": 2.423229414015984e-06, "loss": 1.4668, "step": 78140 }, { "epoch": 2.273613171597289, "grad_norm": 13.0, "learning_rate": 2.421290132343369e-06, "loss": 1.5313, "step": 78160 }, { "epoch": 2.2741949559298367, "grad_norm": 12.4375, "learning_rate": 2.4193508506707543e-06, "loss": 1.5282, "step": 78180 }, { "epoch": 2.274776740262385, "grad_norm": 13.6875, "learning_rate": 2.4174115689981394e-06, "loss": 1.5503, "step": 78200 }, { "epoch": 2.2753585245949326, "grad_norm": 16.0, "learning_rate": 2.4154722873255245e-06, "loss": 1.5384, "step": 78220 }, { "epoch": 2.2759403089274803, "grad_norm": 15.75, "learning_rate": 2.4135330056529095e-06, "loss": 1.4713, "step": 78240 }, { "epoch": 2.2765220932600285, "grad_norm": 13.3125, "learning_rate": 2.4115937239802946e-06, "loss": 1.4791, "step": 78260 }, { "epoch": 2.2771038775925763, "grad_norm": 14.75, "learning_rate": 2.4096544423076797e-06, "loss": 1.5024, "step": 78280 }, { "epoch": 2.2776856619251244, "grad_norm": 14.75, "learning_rate": 2.4077151606350644e-06, "loss": 1.4897, "step": 78300 }, { "epoch": 2.278267446257672, "grad_norm": 14.9375, "learning_rate": 2.4057758789624495e-06, "loss": 1.4642, "step": 78320 }, { "epoch": 2.2788492305902204, "grad_norm": 13.5, "learning_rate": 2.4038365972898346e-06, "loss": 1.4546, "step": 78340 }, { "epoch": 2.279431014922768, "grad_norm": 15.75, "learning_rate": 2.4018973156172197e-06, "loss": 1.544, "step": 78360 }, { "epoch": 2.2800127992553163, "grad_norm": 16.0, "learning_rate": 2.399958033944605e-06, "loss": 1.477, "step": 78380 }, { "epoch": 2.280594583587864, "grad_norm": 14.6875, "learning_rate": 2.39801875227199e-06, "loss": 1.5448, "step": 78400 }, { "epoch": 2.281176367920412, "grad_norm": 14.5, "learning_rate": 2.396079470599375e-06, "loss": 1.4392, "step": 78420 }, { "epoch": 2.28175815225296, "grad_norm": 15.125, "learning_rate": 2.3941401889267597e-06, "loss": 1.5462, "step": 78440 }, { "epoch": 2.2823399365855077, "grad_norm": 16.375, "learning_rate": 2.3922009072541448e-06, "loss": 1.5173, "step": 78460 }, { "epoch": 2.282921720918056, "grad_norm": 13.5, "learning_rate": 2.39026162558153e-06, "loss": 1.4916, "step": 78480 }, { "epoch": 2.2835035052506036, "grad_norm": 12.875, "learning_rate": 2.388322343908915e-06, "loss": 1.4347, "step": 78500 }, { "epoch": 2.2840852895831514, "grad_norm": 11.3125, "learning_rate": 2.3863830622363e-06, "loss": 1.5101, "step": 78520 }, { "epoch": 2.2846670739156996, "grad_norm": 15.0625, "learning_rate": 2.384443780563685e-06, "loss": 1.4632, "step": 78540 }, { "epoch": 2.2852488582482473, "grad_norm": 14.875, "learning_rate": 2.3825044988910703e-06, "loss": 1.5455, "step": 78560 }, { "epoch": 2.2858306425807955, "grad_norm": 14.125, "learning_rate": 2.3805652172184554e-06, "loss": 1.4849, "step": 78580 }, { "epoch": 2.2864124269133432, "grad_norm": 12.5625, "learning_rate": 2.3786259355458405e-06, "loss": 1.5309, "step": 78600 }, { "epoch": 2.286994211245891, "grad_norm": 11.75, "learning_rate": 2.3766866538732256e-06, "loss": 1.5735, "step": 78620 }, { "epoch": 2.287575995578439, "grad_norm": 14.5, "learning_rate": 2.3747473722006107e-06, "loss": 1.5037, "step": 78640 }, { "epoch": 2.288157779910987, "grad_norm": 13.0, "learning_rate": 2.3728080905279957e-06, "loss": 1.4424, "step": 78660 }, { "epoch": 2.288739564243535, "grad_norm": 14.25, "learning_rate": 2.370868808855381e-06, "loss": 1.4635, "step": 78680 }, { "epoch": 2.289321348576083, "grad_norm": 13.125, "learning_rate": 2.368929527182766e-06, "loss": 1.4897, "step": 78700 }, { "epoch": 2.2899031329086306, "grad_norm": 14.3125, "learning_rate": 2.366990245510151e-06, "loss": 1.4852, "step": 78720 }, { "epoch": 2.2904849172411788, "grad_norm": 13.875, "learning_rate": 2.365050963837536e-06, "loss": 1.5879, "step": 78740 }, { "epoch": 2.2910667015737265, "grad_norm": 12.375, "learning_rate": 2.3631116821649212e-06, "loss": 1.5611, "step": 78760 }, { "epoch": 2.2916484859062747, "grad_norm": 13.75, "learning_rate": 2.3611724004923063e-06, "loss": 1.5068, "step": 78780 }, { "epoch": 2.2922302702388224, "grad_norm": 15.0, "learning_rate": 2.3592331188196914e-06, "loss": 1.4878, "step": 78800 }, { "epoch": 2.29281205457137, "grad_norm": 17.5, "learning_rate": 2.3572938371470765e-06, "loss": 1.3869, "step": 78820 }, { "epoch": 2.2933938389039183, "grad_norm": 15.25, "learning_rate": 2.355354555474461e-06, "loss": 1.5822, "step": 78840 }, { "epoch": 2.293975623236466, "grad_norm": 12.25, "learning_rate": 2.3534152738018463e-06, "loss": 1.5676, "step": 78860 }, { "epoch": 2.2945574075690143, "grad_norm": 10.375, "learning_rate": 2.3514759921292314e-06, "loss": 1.4993, "step": 78880 }, { "epoch": 2.295139191901562, "grad_norm": 13.25, "learning_rate": 2.3495367104566165e-06, "loss": 1.4298, "step": 78900 }, { "epoch": 2.29572097623411, "grad_norm": 12.4375, "learning_rate": 2.3475974287840016e-06, "loss": 1.5453, "step": 78920 }, { "epoch": 2.296302760566658, "grad_norm": 15.3125, "learning_rate": 2.3456581471113867e-06, "loss": 1.4572, "step": 78940 }, { "epoch": 2.2968845448992057, "grad_norm": 16.5, "learning_rate": 2.3437188654387718e-06, "loss": 1.4612, "step": 78960 }, { "epoch": 2.297466329231754, "grad_norm": 13.375, "learning_rate": 2.341779583766157e-06, "loss": 1.584, "step": 78980 }, { "epoch": 2.2980481135643016, "grad_norm": 14.125, "learning_rate": 2.339840302093542e-06, "loss": 1.4853, "step": 79000 }, { "epoch": 2.29862989789685, "grad_norm": 9.25, "learning_rate": 2.337901020420927e-06, "loss": 1.4815, "step": 79020 }, { "epoch": 2.2992116822293975, "grad_norm": 12.25, "learning_rate": 2.335961738748312e-06, "loss": 1.4859, "step": 79040 }, { "epoch": 2.2997934665619457, "grad_norm": 14.0625, "learning_rate": 2.3340224570756973e-06, "loss": 1.5659, "step": 79060 }, { "epoch": 2.3003752508944935, "grad_norm": 13.6875, "learning_rate": 2.3320831754030824e-06, "loss": 1.498, "step": 79080 }, { "epoch": 2.300957035227041, "grad_norm": 12.8125, "learning_rate": 2.3301438937304675e-06, "loss": 1.4827, "step": 79100 }, { "epoch": 2.3015388195595894, "grad_norm": 12.0625, "learning_rate": 2.3282046120578526e-06, "loss": 1.4264, "step": 79120 }, { "epoch": 2.302120603892137, "grad_norm": 15.3125, "learning_rate": 2.3262653303852377e-06, "loss": 1.4727, "step": 79140 }, { "epoch": 2.3027023882246853, "grad_norm": 14.25, "learning_rate": 2.3243260487126228e-06, "loss": 1.4976, "step": 79160 }, { "epoch": 2.303284172557233, "grad_norm": 14.125, "learning_rate": 2.322386767040008e-06, "loss": 1.4303, "step": 79180 }, { "epoch": 2.303865956889781, "grad_norm": 17.875, "learning_rate": 2.320447485367393e-06, "loss": 1.4618, "step": 79200 }, { "epoch": 2.304447741222329, "grad_norm": 14.0625, "learning_rate": 2.318508203694778e-06, "loss": 1.463, "step": 79220 }, { "epoch": 2.3050295255548767, "grad_norm": 12.6875, "learning_rate": 2.3165689220221627e-06, "loss": 1.4862, "step": 79240 }, { "epoch": 2.305611309887425, "grad_norm": 14.5625, "learning_rate": 2.314629640349548e-06, "loss": 1.4419, "step": 79260 }, { "epoch": 2.3061930942199727, "grad_norm": 11.75, "learning_rate": 2.312690358676933e-06, "loss": 1.4422, "step": 79280 }, { "epoch": 2.3067748785525204, "grad_norm": 14.5, "learning_rate": 2.310751077004318e-06, "loss": 1.5241, "step": 79300 }, { "epoch": 2.3073566628850686, "grad_norm": 13.25, "learning_rate": 2.308811795331703e-06, "loss": 1.4881, "step": 79320 }, { "epoch": 2.3079384472176163, "grad_norm": 13.25, "learning_rate": 2.3068725136590882e-06, "loss": 1.5045, "step": 79340 }, { "epoch": 2.3085202315501645, "grad_norm": 13.8125, "learning_rate": 2.3049332319864733e-06, "loss": 1.4625, "step": 79360 }, { "epoch": 2.3091020158827122, "grad_norm": 13.75, "learning_rate": 2.3029939503138584e-06, "loss": 1.5605, "step": 79380 }, { "epoch": 2.30968380021526, "grad_norm": 13.625, "learning_rate": 2.3010546686412435e-06, "loss": 1.4955, "step": 79400 }, { "epoch": 2.310265584547808, "grad_norm": 13.4375, "learning_rate": 2.2991153869686286e-06, "loss": 1.5451, "step": 79420 }, { "epoch": 2.310847368880356, "grad_norm": 15.5, "learning_rate": 2.2971761052960137e-06, "loss": 1.4791, "step": 79440 }, { "epoch": 2.311429153212904, "grad_norm": 13.1875, "learning_rate": 2.295236823623399e-06, "loss": 1.4711, "step": 79460 }, { "epoch": 2.312010937545452, "grad_norm": 14.375, "learning_rate": 2.2932975419507835e-06, "loss": 1.5716, "step": 79480 }, { "epoch": 2.3125927218779996, "grad_norm": 13.5, "learning_rate": 2.2913582602781686e-06, "loss": 1.5021, "step": 79500 }, { "epoch": 2.3131745062105478, "grad_norm": 15.0625, "learning_rate": 2.2894189786055537e-06, "loss": 1.4842, "step": 79520 }, { "epoch": 2.3137562905430955, "grad_norm": 14.25, "learning_rate": 2.2874796969329388e-06, "loss": 1.4506, "step": 79540 }, { "epoch": 2.3143380748756437, "grad_norm": 11.875, "learning_rate": 2.285540415260324e-06, "loss": 1.5117, "step": 79560 }, { "epoch": 2.3149198592081914, "grad_norm": 11.8125, "learning_rate": 2.283601133587709e-06, "loss": 1.5294, "step": 79580 }, { "epoch": 2.3155016435407396, "grad_norm": 12.9375, "learning_rate": 2.281661851915094e-06, "loss": 1.4868, "step": 79600 }, { "epoch": 2.3160834278732874, "grad_norm": 12.0625, "learning_rate": 2.279722570242479e-06, "loss": 1.5197, "step": 79620 }, { "epoch": 2.3166652122058355, "grad_norm": 15.0, "learning_rate": 2.2777832885698643e-06, "loss": 1.5152, "step": 79640 }, { "epoch": 2.3172469965383833, "grad_norm": 15.8125, "learning_rate": 2.2758440068972494e-06, "loss": 1.5211, "step": 79660 }, { "epoch": 2.317828780870931, "grad_norm": 12.625, "learning_rate": 2.2739047252246344e-06, "loss": 1.4938, "step": 79680 }, { "epoch": 2.318410565203479, "grad_norm": 13.0, "learning_rate": 2.2719654435520195e-06, "loss": 1.5331, "step": 79700 }, { "epoch": 2.318992349536027, "grad_norm": 12.8125, "learning_rate": 2.2700261618794046e-06, "loss": 1.4789, "step": 79720 }, { "epoch": 2.319574133868575, "grad_norm": 16.875, "learning_rate": 2.2680868802067897e-06, "loss": 1.4278, "step": 79740 }, { "epoch": 2.320155918201123, "grad_norm": 14.5625, "learning_rate": 2.266147598534175e-06, "loss": 1.4458, "step": 79760 }, { "epoch": 2.3207377025336706, "grad_norm": 13.8125, "learning_rate": 2.26420831686156e-06, "loss": 1.4504, "step": 79780 }, { "epoch": 2.321319486866219, "grad_norm": 12.875, "learning_rate": 2.2622690351889446e-06, "loss": 1.4409, "step": 79800 }, { "epoch": 2.3219012711987665, "grad_norm": 11.75, "learning_rate": 2.2603297535163297e-06, "loss": 1.51, "step": 79820 }, { "epoch": 2.3224830555313147, "grad_norm": 15.625, "learning_rate": 2.258390471843715e-06, "loss": 1.4906, "step": 79840 }, { "epoch": 2.3230648398638625, "grad_norm": 12.5625, "learning_rate": 2.2564511901711e-06, "loss": 1.526, "step": 79860 }, { "epoch": 2.32364662419641, "grad_norm": 13.875, "learning_rate": 2.254511908498485e-06, "loss": 1.4035, "step": 79880 }, { "epoch": 2.3242284085289584, "grad_norm": 15.6875, "learning_rate": 2.25257262682587e-06, "loss": 1.4974, "step": 79900 }, { "epoch": 2.324810192861506, "grad_norm": 13.125, "learning_rate": 2.250633345153255e-06, "loss": 1.4432, "step": 79920 }, { "epoch": 2.3253919771940543, "grad_norm": 11.875, "learning_rate": 2.2486940634806403e-06, "loss": 1.4812, "step": 79940 }, { "epoch": 2.325973761526602, "grad_norm": 12.4375, "learning_rate": 2.2467547818080254e-06, "loss": 1.5583, "step": 79960 }, { "epoch": 2.32655554585915, "grad_norm": 12.25, "learning_rate": 2.2448155001354105e-06, "loss": 1.4932, "step": 79980 }, { "epoch": 2.327137330191698, "grad_norm": 13.4375, "learning_rate": 2.2428762184627956e-06, "loss": 1.5036, "step": 80000 }, { "epoch": 2.3277191145242457, "grad_norm": 13.9375, "learning_rate": 2.2409369367901807e-06, "loss": 1.5853, "step": 80020 }, { "epoch": 2.328300898856794, "grad_norm": 15.0, "learning_rate": 2.2389976551175658e-06, "loss": 1.492, "step": 80040 }, { "epoch": 2.3288826831893417, "grad_norm": 18.0, "learning_rate": 2.237058373444951e-06, "loss": 1.4796, "step": 80060 }, { "epoch": 2.3294644675218894, "grad_norm": 12.25, "learning_rate": 2.235119091772336e-06, "loss": 1.5104, "step": 80080 }, { "epoch": 2.3300462518544376, "grad_norm": 13.5, "learning_rate": 2.233179810099721e-06, "loss": 1.5557, "step": 80100 }, { "epoch": 2.3306280361869853, "grad_norm": 16.375, "learning_rate": 2.231240528427106e-06, "loss": 1.5181, "step": 80120 }, { "epoch": 2.3312098205195335, "grad_norm": 14.875, "learning_rate": 2.2293012467544913e-06, "loss": 1.5075, "step": 80140 }, { "epoch": 2.3317916048520813, "grad_norm": 11.0, "learning_rate": 2.2273619650818764e-06, "loss": 1.5286, "step": 80160 }, { "epoch": 2.3323733891846294, "grad_norm": 12.1875, "learning_rate": 2.2254226834092615e-06, "loss": 1.4977, "step": 80180 }, { "epoch": 2.332955173517177, "grad_norm": 12.0, "learning_rate": 2.223483401736646e-06, "loss": 1.5509, "step": 80200 }, { "epoch": 2.333536957849725, "grad_norm": 14.6875, "learning_rate": 2.2215441200640312e-06, "loss": 1.5433, "step": 80220 }, { "epoch": 2.334118742182273, "grad_norm": 13.0625, "learning_rate": 2.2196048383914163e-06, "loss": 1.4002, "step": 80240 }, { "epoch": 2.334700526514821, "grad_norm": 13.0, "learning_rate": 2.2176655567188014e-06, "loss": 1.4283, "step": 80260 }, { "epoch": 2.335282310847369, "grad_norm": 12.5625, "learning_rate": 2.2157262750461865e-06, "loss": 1.619, "step": 80280 }, { "epoch": 2.3358640951799168, "grad_norm": 14.3125, "learning_rate": 2.2137869933735716e-06, "loss": 1.5174, "step": 80300 }, { "epoch": 2.336445879512465, "grad_norm": 14.625, "learning_rate": 2.2118477117009567e-06, "loss": 1.4691, "step": 80320 }, { "epoch": 2.3370276638450127, "grad_norm": 14.3125, "learning_rate": 2.209908430028342e-06, "loss": 1.5607, "step": 80340 }, { "epoch": 2.3376094481775604, "grad_norm": 13.5, "learning_rate": 2.207969148355727e-06, "loss": 1.4788, "step": 80360 }, { "epoch": 2.3381912325101086, "grad_norm": 12.875, "learning_rate": 2.206029866683112e-06, "loss": 1.5194, "step": 80380 }, { "epoch": 2.3387730168426564, "grad_norm": 9.4375, "learning_rate": 2.204090585010497e-06, "loss": 1.5032, "step": 80400 }, { "epoch": 2.3393548011752046, "grad_norm": 14.9375, "learning_rate": 2.202151303337882e-06, "loss": 1.4447, "step": 80420 }, { "epoch": 2.3399365855077523, "grad_norm": 17.5, "learning_rate": 2.2002120216652673e-06, "loss": 1.4967, "step": 80440 }, { "epoch": 2.3405183698403, "grad_norm": 12.5, "learning_rate": 2.1982727399926524e-06, "loss": 1.4616, "step": 80460 }, { "epoch": 2.3411001541728482, "grad_norm": 11.6875, "learning_rate": 2.1963334583200375e-06, "loss": 1.4305, "step": 80480 }, { "epoch": 2.341681938505396, "grad_norm": 14.6875, "learning_rate": 2.1943941766474226e-06, "loss": 1.5134, "step": 80500 }, { "epoch": 2.342263722837944, "grad_norm": 12.125, "learning_rate": 2.1924548949748077e-06, "loss": 1.4399, "step": 80520 }, { "epoch": 2.342845507170492, "grad_norm": 8.5625, "learning_rate": 2.1905156133021924e-06, "loss": 1.4176, "step": 80540 }, { "epoch": 2.3434272915030396, "grad_norm": 13.3125, "learning_rate": 2.1885763316295775e-06, "loss": 1.5658, "step": 80560 }, { "epoch": 2.344009075835588, "grad_norm": 11.3125, "learning_rate": 2.1866370499569626e-06, "loss": 1.6313, "step": 80580 }, { "epoch": 2.3445908601681356, "grad_norm": 15.1875, "learning_rate": 2.1846977682843477e-06, "loss": 1.5179, "step": 80600 }, { "epoch": 2.3451726445006837, "grad_norm": 13.0625, "learning_rate": 2.1827584866117328e-06, "loss": 1.501, "step": 80620 }, { "epoch": 2.3457544288332315, "grad_norm": 13.0, "learning_rate": 2.180819204939118e-06, "loss": 1.5677, "step": 80640 }, { "epoch": 2.3463362131657792, "grad_norm": 14.8125, "learning_rate": 2.178879923266503e-06, "loss": 1.4799, "step": 80660 }, { "epoch": 2.3469179974983274, "grad_norm": 13.6875, "learning_rate": 2.176940641593888e-06, "loss": 1.437, "step": 80680 }, { "epoch": 2.347499781830875, "grad_norm": 13.5, "learning_rate": 2.175001359921273e-06, "loss": 1.5217, "step": 80700 }, { "epoch": 2.3480815661634233, "grad_norm": 13.125, "learning_rate": 2.1730620782486582e-06, "loss": 1.5573, "step": 80720 }, { "epoch": 2.348663350495971, "grad_norm": 14.6875, "learning_rate": 2.1711227965760433e-06, "loss": 1.4689, "step": 80740 }, { "epoch": 2.349245134828519, "grad_norm": 14.8125, "learning_rate": 2.169183514903428e-06, "loss": 1.5098, "step": 80760 }, { "epoch": 2.349826919161067, "grad_norm": 15.0, "learning_rate": 2.167244233230813e-06, "loss": 1.5224, "step": 80780 }, { "epoch": 2.3504087034936147, "grad_norm": 13.25, "learning_rate": 2.1653049515581982e-06, "loss": 1.5475, "step": 80800 }, { "epoch": 2.350990487826163, "grad_norm": 14.5, "learning_rate": 2.1633656698855833e-06, "loss": 1.5486, "step": 80820 }, { "epoch": 2.3515722721587107, "grad_norm": 15.6875, "learning_rate": 2.1614263882129684e-06, "loss": 1.4639, "step": 80840 }, { "epoch": 2.352154056491259, "grad_norm": 13.3125, "learning_rate": 2.1594871065403535e-06, "loss": 1.4772, "step": 80860 }, { "epoch": 2.3527358408238066, "grad_norm": 11.875, "learning_rate": 2.1575478248677386e-06, "loss": 1.5233, "step": 80880 }, { "epoch": 2.353317625156355, "grad_norm": 14.0625, "learning_rate": 2.1556085431951237e-06, "loss": 1.4569, "step": 80900 }, { "epoch": 2.3538994094889025, "grad_norm": 15.125, "learning_rate": 2.153669261522509e-06, "loss": 1.5201, "step": 80920 }, { "epoch": 2.3544811938214503, "grad_norm": 13.9375, "learning_rate": 2.151729979849894e-06, "loss": 1.5271, "step": 80940 }, { "epoch": 2.3550629781539985, "grad_norm": 14.3125, "learning_rate": 2.149790698177279e-06, "loss": 1.525, "step": 80960 }, { "epoch": 2.355644762486546, "grad_norm": 13.5625, "learning_rate": 2.147851416504664e-06, "loss": 1.5975, "step": 80980 }, { "epoch": 2.3562265468190944, "grad_norm": 14.1875, "learning_rate": 2.145912134832049e-06, "loss": 1.4742, "step": 81000 }, { "epoch": 2.356808331151642, "grad_norm": 12.8125, "learning_rate": 2.1439728531594343e-06, "loss": 1.4587, "step": 81020 }, { "epoch": 2.35739011548419, "grad_norm": 11.5, "learning_rate": 2.1420335714868194e-06, "loss": 1.47, "step": 81040 }, { "epoch": 2.357971899816738, "grad_norm": 13.75, "learning_rate": 2.1400942898142045e-06, "loss": 1.4474, "step": 81060 }, { "epoch": 2.358553684149286, "grad_norm": 14.9375, "learning_rate": 2.1381550081415896e-06, "loss": 1.5069, "step": 81080 }, { "epoch": 2.359135468481834, "grad_norm": 14.0625, "learning_rate": 2.1362157264689747e-06, "loss": 1.5437, "step": 81100 }, { "epoch": 2.3597172528143817, "grad_norm": 15.875, "learning_rate": 2.1342764447963598e-06, "loss": 1.4676, "step": 81120 }, { "epoch": 2.3602990371469295, "grad_norm": 13.4375, "learning_rate": 2.132337163123745e-06, "loss": 1.5713, "step": 81140 }, { "epoch": 2.3608808214794776, "grad_norm": 13.3125, "learning_rate": 2.1303978814511295e-06, "loss": 1.513, "step": 81160 }, { "epoch": 2.3614626058120254, "grad_norm": 12.5, "learning_rate": 2.1284585997785146e-06, "loss": 1.589, "step": 81180 }, { "epoch": 2.3620443901445736, "grad_norm": 10.25, "learning_rate": 2.1265193181058997e-06, "loss": 1.5326, "step": 81200 }, { "epoch": 2.3626261744771213, "grad_norm": 16.25, "learning_rate": 2.124580036433285e-06, "loss": 1.5033, "step": 81220 }, { "epoch": 2.363207958809669, "grad_norm": 11.75, "learning_rate": 2.12264075476067e-06, "loss": 1.5411, "step": 81240 }, { "epoch": 2.3637897431422172, "grad_norm": 13.1875, "learning_rate": 2.120701473088055e-06, "loss": 1.5313, "step": 81260 }, { "epoch": 2.364371527474765, "grad_norm": 12.75, "learning_rate": 2.11876219141544e-06, "loss": 1.4944, "step": 81280 }, { "epoch": 2.364953311807313, "grad_norm": 15.9375, "learning_rate": 2.1168229097428252e-06, "loss": 1.4613, "step": 81300 }, { "epoch": 2.365535096139861, "grad_norm": 13.5, "learning_rate": 2.1148836280702103e-06, "loss": 1.5501, "step": 81320 }, { "epoch": 2.3661168804724086, "grad_norm": 12.75, "learning_rate": 2.1129443463975954e-06, "loss": 1.546, "step": 81340 }, { "epoch": 2.366698664804957, "grad_norm": 12.875, "learning_rate": 2.1110050647249805e-06, "loss": 1.4815, "step": 81360 }, { "epoch": 2.3672804491375046, "grad_norm": 12.6875, "learning_rate": 2.1090657830523656e-06, "loss": 1.4589, "step": 81380 }, { "epoch": 2.3678622334700528, "grad_norm": 11.4375, "learning_rate": 2.1071265013797507e-06, "loss": 1.5355, "step": 81400 }, { "epoch": 2.3684440178026005, "grad_norm": 13.125, "learning_rate": 2.105187219707136e-06, "loss": 1.5244, "step": 81420 }, { "epoch": 2.3690258021351487, "grad_norm": 12.1875, "learning_rate": 2.103247938034521e-06, "loss": 1.4817, "step": 81440 }, { "epoch": 2.3696075864676964, "grad_norm": 13.375, "learning_rate": 2.101308656361906e-06, "loss": 1.4578, "step": 81460 }, { "epoch": 2.370189370800244, "grad_norm": 12.8125, "learning_rate": 2.099369374689291e-06, "loss": 1.4558, "step": 81480 }, { "epoch": 2.3707711551327924, "grad_norm": 13.0625, "learning_rate": 2.097430093016676e-06, "loss": 1.5164, "step": 81500 }, { "epoch": 2.37135293946534, "grad_norm": 13.0625, "learning_rate": 2.0954908113440613e-06, "loss": 1.5401, "step": 81520 }, { "epoch": 2.3719347237978883, "grad_norm": 14.25, "learning_rate": 2.0935515296714464e-06, "loss": 1.5447, "step": 81540 }, { "epoch": 2.372516508130436, "grad_norm": 13.6875, "learning_rate": 2.091612247998831e-06, "loss": 1.524, "step": 81560 }, { "epoch": 2.373098292462984, "grad_norm": 12.125, "learning_rate": 2.089672966326216e-06, "loss": 1.5211, "step": 81580 }, { "epoch": 2.373680076795532, "grad_norm": 16.625, "learning_rate": 2.0877336846536013e-06, "loss": 1.5303, "step": 81600 }, { "epoch": 2.3742618611280797, "grad_norm": 14.625, "learning_rate": 2.0857944029809864e-06, "loss": 1.5686, "step": 81620 }, { "epoch": 2.374843645460628, "grad_norm": 13.4375, "learning_rate": 2.0838551213083715e-06, "loss": 1.5455, "step": 81640 }, { "epoch": 2.3754254297931756, "grad_norm": 13.0625, "learning_rate": 2.0819158396357566e-06, "loss": 1.4859, "step": 81660 }, { "epoch": 2.376007214125724, "grad_norm": 11.625, "learning_rate": 2.0799765579631417e-06, "loss": 1.4473, "step": 81680 }, { "epoch": 2.3765889984582715, "grad_norm": 14.9375, "learning_rate": 2.0780372762905263e-06, "loss": 1.5468, "step": 81700 }, { "epoch": 2.3771707827908193, "grad_norm": 13.25, "learning_rate": 2.0760979946179114e-06, "loss": 1.4947, "step": 81720 }, { "epoch": 2.3777525671233675, "grad_norm": 14.25, "learning_rate": 2.0741587129452965e-06, "loss": 1.4489, "step": 81740 }, { "epoch": 2.378334351455915, "grad_norm": 14.375, "learning_rate": 2.0722194312726816e-06, "loss": 1.4634, "step": 81760 }, { "epoch": 2.3789161357884634, "grad_norm": 12.875, "learning_rate": 2.0702801496000667e-06, "loss": 1.5208, "step": 81780 }, { "epoch": 2.379497920121011, "grad_norm": 14.5, "learning_rate": 2.068340867927452e-06, "loss": 1.4866, "step": 81800 }, { "epoch": 2.380079704453559, "grad_norm": 14.5, "learning_rate": 2.066401586254837e-06, "loss": 1.4926, "step": 81820 }, { "epoch": 2.380661488786107, "grad_norm": 12.6875, "learning_rate": 2.064462304582222e-06, "loss": 1.5062, "step": 81840 }, { "epoch": 2.381243273118655, "grad_norm": 15.625, "learning_rate": 2.062523022909607e-06, "loss": 1.4599, "step": 81860 }, { "epoch": 2.381825057451203, "grad_norm": 12.0, "learning_rate": 2.060583741236992e-06, "loss": 1.516, "step": 81880 }, { "epoch": 2.3824068417837507, "grad_norm": 13.25, "learning_rate": 2.0586444595643773e-06, "loss": 1.4665, "step": 81900 }, { "epoch": 2.3829886261162985, "grad_norm": 17.375, "learning_rate": 2.0567051778917624e-06, "loss": 1.5056, "step": 81920 }, { "epoch": 2.3835704104488467, "grad_norm": 14.125, "learning_rate": 2.0547658962191475e-06, "loss": 1.4422, "step": 81940 }, { "epoch": 2.3841521947813944, "grad_norm": 13.875, "learning_rate": 2.0528266145465326e-06, "loss": 1.5094, "step": 81960 }, { "epoch": 2.3847339791139426, "grad_norm": 15.5, "learning_rate": 2.0508873328739177e-06, "loss": 1.6147, "step": 81980 }, { "epoch": 2.3853157634464903, "grad_norm": 12.6875, "learning_rate": 2.048948051201303e-06, "loss": 1.4948, "step": 82000 }, { "epoch": 2.385897547779038, "grad_norm": 13.625, "learning_rate": 2.047008769528688e-06, "loss": 1.4844, "step": 82020 }, { "epoch": 2.3864793321115862, "grad_norm": 13.625, "learning_rate": 2.045069487856073e-06, "loss": 1.5263, "step": 82040 }, { "epoch": 2.387061116444134, "grad_norm": 10.125, "learning_rate": 2.043130206183458e-06, "loss": 1.4915, "step": 82060 }, { "epoch": 2.387642900776682, "grad_norm": 12.875, "learning_rate": 2.041190924510843e-06, "loss": 1.4541, "step": 82080 }, { "epoch": 2.38822468510923, "grad_norm": 13.625, "learning_rate": 2.0392516428382283e-06, "loss": 1.4651, "step": 82100 }, { "epoch": 2.388806469441778, "grad_norm": 14.0, "learning_rate": 2.037312361165613e-06, "loss": 1.4914, "step": 82120 }, { "epoch": 2.389388253774326, "grad_norm": 13.5625, "learning_rate": 2.035373079492998e-06, "loss": 1.498, "step": 82140 }, { "epoch": 2.389970038106874, "grad_norm": 13.5, "learning_rate": 2.033433797820383e-06, "loss": 1.4332, "step": 82160 }, { "epoch": 2.3905518224394218, "grad_norm": 11.3125, "learning_rate": 2.0314945161477682e-06, "loss": 1.4921, "step": 82180 }, { "epoch": 2.3911336067719695, "grad_norm": 10.875, "learning_rate": 2.0295552344751533e-06, "loss": 1.4908, "step": 82200 }, { "epoch": 2.3917153911045177, "grad_norm": 15.625, "learning_rate": 2.0276159528025384e-06, "loss": 1.4405, "step": 82220 }, { "epoch": 2.3922971754370654, "grad_norm": 12.875, "learning_rate": 2.0256766711299235e-06, "loss": 1.5628, "step": 82240 }, { "epoch": 2.3928789597696136, "grad_norm": 13.6875, "learning_rate": 2.0237373894573086e-06, "loss": 1.4273, "step": 82260 }, { "epoch": 2.3934607441021614, "grad_norm": 13.375, "learning_rate": 2.0217981077846937e-06, "loss": 1.5593, "step": 82280 }, { "epoch": 2.394042528434709, "grad_norm": 14.375, "learning_rate": 2.019858826112079e-06, "loss": 1.5318, "step": 82300 }, { "epoch": 2.3946243127672573, "grad_norm": 13.5, "learning_rate": 2.017919544439464e-06, "loss": 1.4808, "step": 82320 }, { "epoch": 2.395206097099805, "grad_norm": 14.5625, "learning_rate": 2.015980262766849e-06, "loss": 1.5295, "step": 82340 }, { "epoch": 2.395787881432353, "grad_norm": 12.0, "learning_rate": 2.014040981094234e-06, "loss": 1.6113, "step": 82360 }, { "epoch": 2.396369665764901, "grad_norm": 14.0, "learning_rate": 2.0121016994216192e-06, "loss": 1.5515, "step": 82380 }, { "epoch": 2.3969514500974487, "grad_norm": 13.25, "learning_rate": 2.0101624177490043e-06, "loss": 1.4164, "step": 82400 }, { "epoch": 2.397533234429997, "grad_norm": 14.4375, "learning_rate": 2.0082231360763894e-06, "loss": 1.5213, "step": 82420 }, { "epoch": 2.3981150187625446, "grad_norm": 11.9375, "learning_rate": 2.0062838544037745e-06, "loss": 1.4736, "step": 82440 }, { "epoch": 2.398696803095093, "grad_norm": 15.0, "learning_rate": 2.0043445727311596e-06, "loss": 1.5123, "step": 82460 }, { "epoch": 2.3992785874276406, "grad_norm": 14.1875, "learning_rate": 2.0024052910585447e-06, "loss": 1.399, "step": 82480 }, { "epoch": 2.3998603717601883, "grad_norm": 17.625, "learning_rate": 2.00046600938593e-06, "loss": 1.4898, "step": 82500 }, { "epoch": 2.4004421560927365, "grad_norm": 13.5, "learning_rate": 1.9985267277133145e-06, "loss": 1.4799, "step": 82520 }, { "epoch": 2.401023940425284, "grad_norm": 13.5, "learning_rate": 1.9965874460406996e-06, "loss": 1.5474, "step": 82540 }, { "epoch": 2.4016057247578324, "grad_norm": 14.625, "learning_rate": 1.9946481643680847e-06, "loss": 1.4957, "step": 82560 }, { "epoch": 2.40218750909038, "grad_norm": 12.375, "learning_rate": 1.9927088826954698e-06, "loss": 1.4922, "step": 82580 }, { "epoch": 2.402769293422928, "grad_norm": 16.875, "learning_rate": 1.990769601022855e-06, "loss": 1.4538, "step": 82600 }, { "epoch": 2.403351077755476, "grad_norm": 14.1875, "learning_rate": 1.98883031935024e-06, "loss": 1.474, "step": 82620 }, { "epoch": 2.403932862088024, "grad_norm": 13.875, "learning_rate": 1.986891037677625e-06, "loss": 1.531, "step": 82640 }, { "epoch": 2.404514646420572, "grad_norm": 13.5, "learning_rate": 1.9849517560050097e-06, "loss": 1.5763, "step": 82660 }, { "epoch": 2.4050964307531197, "grad_norm": 12.5625, "learning_rate": 1.983012474332395e-06, "loss": 1.5443, "step": 82680 }, { "epoch": 2.405678215085668, "grad_norm": 11.125, "learning_rate": 1.98107319265978e-06, "loss": 1.5399, "step": 82700 }, { "epoch": 2.4062599994182157, "grad_norm": 14.9375, "learning_rate": 1.979133910987165e-06, "loss": 1.5399, "step": 82720 }, { "epoch": 2.4068417837507634, "grad_norm": 15.625, "learning_rate": 1.97719462931455e-06, "loss": 1.4963, "step": 82740 }, { "epoch": 2.4074235680833116, "grad_norm": 13.9375, "learning_rate": 1.9752553476419352e-06, "loss": 1.6571, "step": 82760 }, { "epoch": 2.4080053524158593, "grad_norm": 13.5, "learning_rate": 1.9733160659693203e-06, "loss": 1.4761, "step": 82780 }, { "epoch": 2.4085871367484075, "grad_norm": 14.5625, "learning_rate": 1.9713767842967054e-06, "loss": 1.5335, "step": 82800 }, { "epoch": 2.4091689210809553, "grad_norm": 17.625, "learning_rate": 1.9694375026240905e-06, "loss": 1.4916, "step": 82820 }, { "epoch": 2.4097507054135034, "grad_norm": 16.75, "learning_rate": 1.9674982209514756e-06, "loss": 1.5589, "step": 82840 }, { "epoch": 2.410332489746051, "grad_norm": 14.1875, "learning_rate": 1.9655589392788607e-06, "loss": 1.4952, "step": 82860 }, { "epoch": 2.410914274078599, "grad_norm": 12.875, "learning_rate": 1.963619657606246e-06, "loss": 1.4848, "step": 82880 }, { "epoch": 2.411496058411147, "grad_norm": 13.625, "learning_rate": 1.961680375933631e-06, "loss": 1.4803, "step": 82900 }, { "epoch": 2.412077842743695, "grad_norm": 15.0, "learning_rate": 1.959741094261016e-06, "loss": 1.5074, "step": 82920 }, { "epoch": 2.412659627076243, "grad_norm": 15.25, "learning_rate": 1.957801812588401e-06, "loss": 1.4036, "step": 82940 }, { "epoch": 2.413241411408791, "grad_norm": 11.4375, "learning_rate": 1.955862530915786e-06, "loss": 1.4796, "step": 82960 }, { "epoch": 2.4138231957413385, "grad_norm": 13.0625, "learning_rate": 1.9539232492431713e-06, "loss": 1.449, "step": 82980 }, { "epoch": 2.4144049800738867, "grad_norm": 13.0625, "learning_rate": 1.9519839675705564e-06, "loss": 1.4359, "step": 83000 }, { "epoch": 2.4149867644064344, "grad_norm": 15.0, "learning_rate": 1.9500446858979415e-06, "loss": 1.4583, "step": 83020 }, { "epoch": 2.4155685487389826, "grad_norm": 16.0, "learning_rate": 1.9481054042253266e-06, "loss": 1.5212, "step": 83040 }, { "epoch": 2.4161503330715304, "grad_norm": 18.125, "learning_rate": 1.9461661225527117e-06, "loss": 1.5268, "step": 83060 }, { "epoch": 2.416732117404078, "grad_norm": 12.375, "learning_rate": 1.9442268408800964e-06, "loss": 1.5542, "step": 83080 }, { "epoch": 2.4173139017366263, "grad_norm": 16.5, "learning_rate": 1.9422875592074815e-06, "loss": 1.3943, "step": 83100 }, { "epoch": 2.417895686069174, "grad_norm": 12.125, "learning_rate": 1.9403482775348666e-06, "loss": 1.4659, "step": 83120 }, { "epoch": 2.4184774704017222, "grad_norm": 15.0, "learning_rate": 1.9384089958622517e-06, "loss": 1.4778, "step": 83140 }, { "epoch": 2.41905925473427, "grad_norm": 14.125, "learning_rate": 1.9364697141896368e-06, "loss": 1.4649, "step": 83160 }, { "epoch": 2.4196410390668177, "grad_norm": 14.375, "learning_rate": 1.934530432517022e-06, "loss": 1.5323, "step": 83180 }, { "epoch": 2.420222823399366, "grad_norm": 18.625, "learning_rate": 1.932591150844407e-06, "loss": 1.5303, "step": 83200 }, { "epoch": 2.4208046077319136, "grad_norm": 11.4375, "learning_rate": 1.930651869171792e-06, "loss": 1.4254, "step": 83220 }, { "epoch": 2.421386392064462, "grad_norm": 14.5, "learning_rate": 1.928712587499177e-06, "loss": 1.4953, "step": 83240 }, { "epoch": 2.4219681763970096, "grad_norm": 14.375, "learning_rate": 1.9267733058265622e-06, "loss": 1.4438, "step": 83260 }, { "epoch": 2.4225499607295573, "grad_norm": 13.1875, "learning_rate": 1.9248340241539473e-06, "loss": 1.4613, "step": 83280 }, { "epoch": 2.4231317450621055, "grad_norm": 12.0, "learning_rate": 1.9228947424813324e-06, "loss": 1.5023, "step": 83300 }, { "epoch": 2.4237135293946532, "grad_norm": 12.6875, "learning_rate": 1.9209554608087175e-06, "loss": 1.5418, "step": 83320 }, { "epoch": 2.4242953137272014, "grad_norm": 12.9375, "learning_rate": 1.9190161791361026e-06, "loss": 1.497, "step": 83340 }, { "epoch": 2.424877098059749, "grad_norm": 10.0, "learning_rate": 1.9170768974634877e-06, "loss": 1.559, "step": 83360 }, { "epoch": 2.4254588823922973, "grad_norm": 13.125, "learning_rate": 1.915137615790873e-06, "loss": 1.5427, "step": 83380 }, { "epoch": 2.426040666724845, "grad_norm": 14.9375, "learning_rate": 1.913198334118258e-06, "loss": 1.4278, "step": 83400 }, { "epoch": 2.4266224510573933, "grad_norm": 12.5625, "learning_rate": 1.911259052445643e-06, "loss": 1.4889, "step": 83420 }, { "epoch": 2.427204235389941, "grad_norm": 12.625, "learning_rate": 1.909319770773028e-06, "loss": 1.4222, "step": 83440 }, { "epoch": 2.4277860197224888, "grad_norm": 15.0, "learning_rate": 1.9073804891004132e-06, "loss": 1.5064, "step": 83460 }, { "epoch": 2.428367804055037, "grad_norm": 14.8125, "learning_rate": 1.905441207427798e-06, "loss": 1.5303, "step": 83480 }, { "epoch": 2.4289495883875847, "grad_norm": 14.875, "learning_rate": 1.9035019257551832e-06, "loss": 1.4656, "step": 83500 }, { "epoch": 2.429531372720133, "grad_norm": 15.125, "learning_rate": 1.9015626440825683e-06, "loss": 1.5907, "step": 83520 }, { "epoch": 2.4301131570526806, "grad_norm": 12.8125, "learning_rate": 1.8996233624099534e-06, "loss": 1.4374, "step": 83540 }, { "epoch": 2.4306949413852283, "grad_norm": 14.8125, "learning_rate": 1.8976840807373385e-06, "loss": 1.5289, "step": 83560 }, { "epoch": 2.4312767257177765, "grad_norm": 14.4375, "learning_rate": 1.8957447990647234e-06, "loss": 1.5548, "step": 83580 }, { "epoch": 2.4318585100503243, "grad_norm": 13.5, "learning_rate": 1.8938055173921085e-06, "loss": 1.4922, "step": 83600 }, { "epoch": 2.4324402943828725, "grad_norm": 14.9375, "learning_rate": 1.8918662357194936e-06, "loss": 1.4578, "step": 83620 }, { "epoch": 2.43302207871542, "grad_norm": 13.8125, "learning_rate": 1.8899269540468787e-06, "loss": 1.4666, "step": 83640 }, { "epoch": 2.433603863047968, "grad_norm": 16.25, "learning_rate": 1.8879876723742638e-06, "loss": 1.5009, "step": 83660 }, { "epoch": 2.434185647380516, "grad_norm": 13.4375, "learning_rate": 1.8860483907016489e-06, "loss": 1.4532, "step": 83680 }, { "epoch": 2.434767431713064, "grad_norm": 18.5, "learning_rate": 1.884109109029034e-06, "loss": 1.4642, "step": 83700 }, { "epoch": 2.435349216045612, "grad_norm": 15.6875, "learning_rate": 1.8821698273564186e-06, "loss": 1.569, "step": 83720 }, { "epoch": 2.43593100037816, "grad_norm": 14.25, "learning_rate": 1.8802305456838037e-06, "loss": 1.5864, "step": 83740 }, { "epoch": 2.4365127847107075, "grad_norm": 11.0625, "learning_rate": 1.8782912640111888e-06, "loss": 1.5732, "step": 83760 }, { "epoch": 2.4370945690432557, "grad_norm": 14.375, "learning_rate": 1.876351982338574e-06, "loss": 1.4666, "step": 83780 }, { "epoch": 2.4376763533758035, "grad_norm": 12.375, "learning_rate": 1.874412700665959e-06, "loss": 1.4964, "step": 83800 }, { "epoch": 2.4382581377083516, "grad_norm": 12.375, "learning_rate": 1.8724734189933441e-06, "loss": 1.5458, "step": 83820 }, { "epoch": 2.4388399220408994, "grad_norm": 13.125, "learning_rate": 1.8705341373207292e-06, "loss": 1.522, "step": 83840 }, { "epoch": 2.439421706373447, "grad_norm": 14.875, "learning_rate": 1.8685948556481143e-06, "loss": 1.5432, "step": 83860 }, { "epoch": 2.4400034907059953, "grad_norm": 14.0, "learning_rate": 1.8666555739754994e-06, "loss": 1.4725, "step": 83880 }, { "epoch": 2.440585275038543, "grad_norm": 14.0, "learning_rate": 1.8647162923028845e-06, "loss": 1.5032, "step": 83900 }, { "epoch": 2.4411670593710912, "grad_norm": 15.375, "learning_rate": 1.8627770106302694e-06, "loss": 1.4386, "step": 83920 }, { "epoch": 2.441748843703639, "grad_norm": 10.5625, "learning_rate": 1.8608377289576545e-06, "loss": 1.5889, "step": 83940 }, { "epoch": 2.442330628036187, "grad_norm": 13.75, "learning_rate": 1.8588984472850396e-06, "loss": 1.5089, "step": 83960 }, { "epoch": 2.442912412368735, "grad_norm": 16.0, "learning_rate": 1.8569591656124247e-06, "loss": 1.5052, "step": 83980 }, { "epoch": 2.4434941967012827, "grad_norm": 14.125, "learning_rate": 1.8550198839398098e-06, "loss": 1.4531, "step": 84000 }, { "epoch": 2.444075981033831, "grad_norm": 14.5625, "learning_rate": 1.8530806022671949e-06, "loss": 1.5164, "step": 84020 }, { "epoch": 2.4446577653663786, "grad_norm": 14.1875, "learning_rate": 1.85114132059458e-06, "loss": 1.5387, "step": 84040 }, { "epoch": 2.4452395496989268, "grad_norm": 16.25, "learning_rate": 1.849202038921965e-06, "loss": 1.4059, "step": 84060 }, { "epoch": 2.4458213340314745, "grad_norm": 12.4375, "learning_rate": 1.8472627572493502e-06, "loss": 1.4368, "step": 84080 }, { "epoch": 2.4464031183640227, "grad_norm": 12.125, "learning_rate": 1.8453234755767353e-06, "loss": 1.461, "step": 84100 }, { "epoch": 2.4469849026965704, "grad_norm": 13.25, "learning_rate": 1.8433841939041204e-06, "loss": 1.5223, "step": 84120 }, { "epoch": 2.447566687029118, "grad_norm": 15.6875, "learning_rate": 1.8414449122315053e-06, "loss": 1.5211, "step": 84140 }, { "epoch": 2.4481484713616664, "grad_norm": 13.9375, "learning_rate": 1.8395056305588904e-06, "loss": 1.4521, "step": 84160 }, { "epoch": 2.448730255694214, "grad_norm": 14.25, "learning_rate": 1.8375663488862755e-06, "loss": 1.5413, "step": 84180 }, { "epoch": 2.4493120400267623, "grad_norm": 11.0, "learning_rate": 1.8356270672136605e-06, "loss": 1.5836, "step": 84200 }, { "epoch": 2.44989382435931, "grad_norm": 14.8125, "learning_rate": 1.8336877855410456e-06, "loss": 1.5283, "step": 84220 }, { "epoch": 2.4504756086918578, "grad_norm": 12.4375, "learning_rate": 1.8317485038684307e-06, "loss": 1.3703, "step": 84240 }, { "epoch": 2.451057393024406, "grad_norm": 13.5625, "learning_rate": 1.8298092221958158e-06, "loss": 1.5435, "step": 84260 }, { "epoch": 2.4516391773569537, "grad_norm": 13.875, "learning_rate": 1.827869940523201e-06, "loss": 1.4617, "step": 84280 }, { "epoch": 2.452220961689502, "grad_norm": 13.875, "learning_rate": 1.825930658850586e-06, "loss": 1.4998, "step": 84300 }, { "epoch": 2.4528027460220496, "grad_norm": 14.625, "learning_rate": 1.8239913771779711e-06, "loss": 1.5317, "step": 84320 }, { "epoch": 2.4533845303545974, "grad_norm": 13.5, "learning_rate": 1.822052095505356e-06, "loss": 1.4806, "step": 84340 }, { "epoch": 2.4539663146871455, "grad_norm": 14.8125, "learning_rate": 1.8201128138327411e-06, "loss": 1.5489, "step": 84360 }, { "epoch": 2.4545480990196933, "grad_norm": 11.6875, "learning_rate": 1.8181735321601262e-06, "loss": 1.466, "step": 84380 }, { "epoch": 2.4551298833522415, "grad_norm": 16.75, "learning_rate": 1.8162342504875113e-06, "loss": 1.4696, "step": 84400 }, { "epoch": 2.455711667684789, "grad_norm": 13.3125, "learning_rate": 1.8142949688148964e-06, "loss": 1.5239, "step": 84420 }, { "epoch": 2.456293452017337, "grad_norm": 13.5625, "learning_rate": 1.8123556871422815e-06, "loss": 1.5328, "step": 84440 }, { "epoch": 2.456875236349885, "grad_norm": 15.1875, "learning_rate": 1.8104164054696666e-06, "loss": 1.449, "step": 84460 }, { "epoch": 2.457457020682433, "grad_norm": 12.25, "learning_rate": 1.8084771237970517e-06, "loss": 1.527, "step": 84480 }, { "epoch": 2.458038805014981, "grad_norm": 11.3125, "learning_rate": 1.8065378421244368e-06, "loss": 1.5881, "step": 84500 }, { "epoch": 2.458620589347529, "grad_norm": 16.5, "learning_rate": 1.804598560451822e-06, "loss": 1.4317, "step": 84520 }, { "epoch": 2.4592023736800765, "grad_norm": 15.25, "learning_rate": 1.8026592787792068e-06, "loss": 1.4889, "step": 84540 }, { "epoch": 2.4597841580126247, "grad_norm": 14.25, "learning_rate": 1.8007199971065919e-06, "loss": 1.5569, "step": 84560 }, { "epoch": 2.4603659423451725, "grad_norm": 14.0625, "learning_rate": 1.798780715433977e-06, "loss": 1.4411, "step": 84580 }, { "epoch": 2.4609477266777207, "grad_norm": 13.0625, "learning_rate": 1.796841433761362e-06, "loss": 1.4549, "step": 84600 }, { "epoch": 2.4615295110102684, "grad_norm": 12.75, "learning_rate": 1.7949021520887472e-06, "loss": 1.5386, "step": 84620 }, { "epoch": 2.4621112953428166, "grad_norm": 14.0, "learning_rate": 1.7929628704161323e-06, "loss": 1.6189, "step": 84640 }, { "epoch": 2.4626930796753643, "grad_norm": 16.75, "learning_rate": 1.7910235887435174e-06, "loss": 1.5666, "step": 84660 }, { "epoch": 2.4632748640079125, "grad_norm": 12.125, "learning_rate": 1.7890843070709025e-06, "loss": 1.4781, "step": 84680 }, { "epoch": 2.4638566483404603, "grad_norm": 13.0, "learning_rate": 1.7871450253982876e-06, "loss": 1.4691, "step": 84700 }, { "epoch": 2.464438432673008, "grad_norm": 7.90625, "learning_rate": 1.7852057437256727e-06, "loss": 1.5353, "step": 84720 }, { "epoch": 2.465020217005556, "grad_norm": 12.5, "learning_rate": 1.7832664620530575e-06, "loss": 1.3974, "step": 84740 }, { "epoch": 2.465602001338104, "grad_norm": 13.4375, "learning_rate": 1.7813271803804426e-06, "loss": 1.4819, "step": 84760 }, { "epoch": 2.466183785670652, "grad_norm": 14.4375, "learning_rate": 1.7793878987078275e-06, "loss": 1.5848, "step": 84780 }, { "epoch": 2.4667655700032, "grad_norm": 11.625, "learning_rate": 1.7774486170352126e-06, "loss": 1.4865, "step": 84800 }, { "epoch": 2.4673473543357476, "grad_norm": 14.375, "learning_rate": 1.7755093353625977e-06, "loss": 1.4644, "step": 84820 }, { "epoch": 2.4679291386682958, "grad_norm": 14.4375, "learning_rate": 1.7735700536899828e-06, "loss": 1.4834, "step": 84840 }, { "epoch": 2.4685109230008435, "grad_norm": 14.6875, "learning_rate": 1.771630772017368e-06, "loss": 1.5563, "step": 84860 }, { "epoch": 2.4690927073333917, "grad_norm": 12.375, "learning_rate": 1.7696914903447528e-06, "loss": 1.5087, "step": 84880 }, { "epoch": 2.4696744916659394, "grad_norm": 14.0, "learning_rate": 1.767752208672138e-06, "loss": 1.4945, "step": 84900 }, { "epoch": 2.470256275998487, "grad_norm": 14.8125, "learning_rate": 1.765812926999523e-06, "loss": 1.4434, "step": 84920 }, { "epoch": 2.4708380603310354, "grad_norm": 15.75, "learning_rate": 1.763873645326908e-06, "loss": 1.5025, "step": 84940 }, { "epoch": 2.471419844663583, "grad_norm": 15.8125, "learning_rate": 1.7619343636542932e-06, "loss": 1.5365, "step": 84960 }, { "epoch": 2.4720016289961313, "grad_norm": 14.625, "learning_rate": 1.7599950819816783e-06, "loss": 1.4666, "step": 84980 }, { "epoch": 2.472583413328679, "grad_norm": 14.1875, "learning_rate": 1.7580558003090634e-06, "loss": 1.5651, "step": 85000 }, { "epoch": 2.4731651976612268, "grad_norm": 14.9375, "learning_rate": 1.7561165186364485e-06, "loss": 1.4765, "step": 85020 }, { "epoch": 2.473746981993775, "grad_norm": 12.3125, "learning_rate": 1.7541772369638336e-06, "loss": 1.4852, "step": 85040 }, { "epoch": 2.4743287663263227, "grad_norm": 12.1875, "learning_rate": 1.7522379552912187e-06, "loss": 1.5172, "step": 85060 }, { "epoch": 2.474910550658871, "grad_norm": 16.125, "learning_rate": 1.7502986736186036e-06, "loss": 1.5443, "step": 85080 }, { "epoch": 2.4754923349914186, "grad_norm": 14.3125, "learning_rate": 1.7483593919459887e-06, "loss": 1.4828, "step": 85100 }, { "epoch": 2.4760741193239664, "grad_norm": 15.0, "learning_rate": 1.7464201102733738e-06, "loss": 1.5358, "step": 85120 }, { "epoch": 2.4766559036565146, "grad_norm": 13.9375, "learning_rate": 1.7444808286007589e-06, "loss": 1.4983, "step": 85140 }, { "epoch": 2.4772376879890623, "grad_norm": 16.375, "learning_rate": 1.742541546928144e-06, "loss": 1.4259, "step": 85160 }, { "epoch": 2.4778194723216105, "grad_norm": 10.5, "learning_rate": 1.740602265255529e-06, "loss": 1.473, "step": 85180 }, { "epoch": 2.4784012566541582, "grad_norm": 13.4375, "learning_rate": 1.7386629835829142e-06, "loss": 1.5687, "step": 85200 }, { "epoch": 2.4789830409867064, "grad_norm": 13.1875, "learning_rate": 1.7367237019102992e-06, "loss": 1.3671, "step": 85220 }, { "epoch": 2.479564825319254, "grad_norm": 14.625, "learning_rate": 1.7347844202376843e-06, "loss": 1.4198, "step": 85240 }, { "epoch": 2.480146609651802, "grad_norm": 13.8125, "learning_rate": 1.7328451385650694e-06, "loss": 1.543, "step": 85260 }, { "epoch": 2.48072839398435, "grad_norm": 13.3125, "learning_rate": 1.7309058568924545e-06, "loss": 1.5451, "step": 85280 }, { "epoch": 2.481310178316898, "grad_norm": 16.25, "learning_rate": 1.7289665752198394e-06, "loss": 1.5445, "step": 85300 }, { "epoch": 2.481891962649446, "grad_norm": 14.6875, "learning_rate": 1.7270272935472245e-06, "loss": 1.5054, "step": 85320 }, { "epoch": 2.4824737469819937, "grad_norm": 12.125, "learning_rate": 1.7250880118746096e-06, "loss": 1.563, "step": 85340 }, { "epoch": 2.483055531314542, "grad_norm": 14.8125, "learning_rate": 1.7231487302019947e-06, "loss": 1.4879, "step": 85360 }, { "epoch": 2.4836373156470897, "grad_norm": 13.4375, "learning_rate": 1.7212094485293798e-06, "loss": 1.4319, "step": 85380 }, { "epoch": 2.4842190999796374, "grad_norm": 13.125, "learning_rate": 1.719270166856765e-06, "loss": 1.4411, "step": 85400 }, { "epoch": 2.4848008843121856, "grad_norm": 14.625, "learning_rate": 1.71733088518415e-06, "loss": 1.5097, "step": 85420 }, { "epoch": 2.4853826686447333, "grad_norm": 16.75, "learning_rate": 1.7153916035115351e-06, "loss": 1.524, "step": 85440 }, { "epoch": 2.4859644529772815, "grad_norm": 16.25, "learning_rate": 1.7134523218389202e-06, "loss": 1.5037, "step": 85460 }, { "epoch": 2.4865462373098293, "grad_norm": 12.6875, "learning_rate": 1.7115130401663053e-06, "loss": 1.5351, "step": 85480 }, { "epoch": 2.487128021642377, "grad_norm": 12.5625, "learning_rate": 1.7095737584936902e-06, "loss": 1.454, "step": 85500 }, { "epoch": 2.487709805974925, "grad_norm": 14.3125, "learning_rate": 1.7076344768210753e-06, "loss": 1.484, "step": 85520 }, { "epoch": 2.488291590307473, "grad_norm": 14.3125, "learning_rate": 1.7056951951484604e-06, "loss": 1.5233, "step": 85540 }, { "epoch": 2.488873374640021, "grad_norm": 11.625, "learning_rate": 1.7037559134758455e-06, "loss": 1.4799, "step": 85560 }, { "epoch": 2.489455158972569, "grad_norm": 14.4375, "learning_rate": 1.7018166318032306e-06, "loss": 1.4214, "step": 85580 }, { "epoch": 2.4900369433051166, "grad_norm": 13.8125, "learning_rate": 1.6998773501306157e-06, "loss": 1.3949, "step": 85600 }, { "epoch": 2.490618727637665, "grad_norm": 14.4375, "learning_rate": 1.6979380684580008e-06, "loss": 1.5463, "step": 85620 }, { "epoch": 2.4912005119702125, "grad_norm": 13.3125, "learning_rate": 1.6959987867853859e-06, "loss": 1.455, "step": 85640 }, { "epoch": 2.4917822963027607, "grad_norm": 16.25, "learning_rate": 1.694059505112771e-06, "loss": 1.4947, "step": 85660 }, { "epoch": 2.4923640806353085, "grad_norm": 10.6875, "learning_rate": 1.692120223440156e-06, "loss": 1.5354, "step": 85680 }, { "epoch": 2.492945864967856, "grad_norm": 13.5, "learning_rate": 1.690180941767541e-06, "loss": 1.5464, "step": 85700 }, { "epoch": 2.4935276493004044, "grad_norm": 13.8125, "learning_rate": 1.688241660094926e-06, "loss": 1.5747, "step": 85720 }, { "epoch": 2.494109433632952, "grad_norm": 13.375, "learning_rate": 1.6863023784223111e-06, "loss": 1.3954, "step": 85740 }, { "epoch": 2.4946912179655003, "grad_norm": 12.9375, "learning_rate": 1.6843630967496962e-06, "loss": 1.5419, "step": 85760 }, { "epoch": 2.495273002298048, "grad_norm": 13.9375, "learning_rate": 1.6824238150770813e-06, "loss": 1.4634, "step": 85780 }, { "epoch": 2.495854786630596, "grad_norm": 13.375, "learning_rate": 1.6804845334044664e-06, "loss": 1.5401, "step": 85800 }, { "epoch": 2.496436570963144, "grad_norm": 12.1875, "learning_rate": 1.6785452517318515e-06, "loss": 1.4788, "step": 85820 }, { "epoch": 2.4970183552956917, "grad_norm": 12.0625, "learning_rate": 1.6766059700592362e-06, "loss": 1.4634, "step": 85840 }, { "epoch": 2.49760013962824, "grad_norm": 15.875, "learning_rate": 1.6746666883866213e-06, "loss": 1.5184, "step": 85860 }, { "epoch": 2.4981819239607876, "grad_norm": 13.5, "learning_rate": 1.6727274067140064e-06, "loss": 1.5197, "step": 85880 }, { "epoch": 2.498763708293336, "grad_norm": 11.5625, "learning_rate": 1.6707881250413915e-06, "loss": 1.5003, "step": 85900 }, { "epoch": 2.4993454926258836, "grad_norm": 12.875, "learning_rate": 1.6688488433687766e-06, "loss": 1.4576, "step": 85920 }, { "epoch": 2.4999272769584318, "grad_norm": 12.875, "learning_rate": 1.6669095616961617e-06, "loss": 1.5049, "step": 85940 }, { "epoch": 2.5005090612909795, "grad_norm": 13.75, "learning_rate": 1.6649702800235468e-06, "loss": 1.5107, "step": 85960 }, { "epoch": 2.5010908456235272, "grad_norm": 12.125, "learning_rate": 1.663030998350932e-06, "loss": 1.4735, "step": 85980 }, { "epoch": 2.5016726299560754, "grad_norm": 16.5, "learning_rate": 1.661091716678317e-06, "loss": 1.4646, "step": 86000 }, { "epoch": 2.502254414288623, "grad_norm": 12.75, "learning_rate": 1.659152435005702e-06, "loss": 1.4567, "step": 86020 }, { "epoch": 2.5028361986211713, "grad_norm": 14.1875, "learning_rate": 1.657213153333087e-06, "loss": 1.4665, "step": 86040 }, { "epoch": 2.503417982953719, "grad_norm": 15.25, "learning_rate": 1.655273871660472e-06, "loss": 1.5428, "step": 86060 }, { "epoch": 2.503999767286267, "grad_norm": 11.8125, "learning_rate": 1.6533345899878572e-06, "loss": 1.5118, "step": 86080 }, { "epoch": 2.504581551618815, "grad_norm": 13.8125, "learning_rate": 1.6513953083152423e-06, "loss": 1.5431, "step": 86100 }, { "epoch": 2.5051633359513628, "grad_norm": 14.375, "learning_rate": 1.6494560266426274e-06, "loss": 1.5274, "step": 86120 }, { "epoch": 2.505745120283911, "grad_norm": 13.75, "learning_rate": 1.6475167449700125e-06, "loss": 1.5037, "step": 86140 }, { "epoch": 2.5063269046164587, "grad_norm": 15.5, "learning_rate": 1.6455774632973976e-06, "loss": 1.5036, "step": 86160 }, { "epoch": 2.5069086889490064, "grad_norm": 19.5, "learning_rate": 1.6436381816247827e-06, "loss": 1.4302, "step": 86180 }, { "epoch": 2.5074904732815546, "grad_norm": 11.75, "learning_rate": 1.6416988999521678e-06, "loss": 1.5004, "step": 86200 }, { "epoch": 2.5080722576141024, "grad_norm": 12.0, "learning_rate": 1.6397596182795529e-06, "loss": 1.4269, "step": 86220 }, { "epoch": 2.5086540419466505, "grad_norm": 12.4375, "learning_rate": 1.6378203366069377e-06, "loss": 1.4466, "step": 86240 }, { "epoch": 2.5092358262791983, "grad_norm": 13.875, "learning_rate": 1.6358810549343228e-06, "loss": 1.5068, "step": 86260 }, { "epoch": 2.509817610611746, "grad_norm": 14.75, "learning_rate": 1.633941773261708e-06, "loss": 1.4814, "step": 86280 }, { "epoch": 2.510399394944294, "grad_norm": 13.1875, "learning_rate": 1.632002491589093e-06, "loss": 1.4891, "step": 86300 }, { "epoch": 2.510981179276842, "grad_norm": 11.375, "learning_rate": 1.6300632099164781e-06, "loss": 1.5594, "step": 86320 }, { "epoch": 2.51156296360939, "grad_norm": 14.1875, "learning_rate": 1.6281239282438632e-06, "loss": 1.5167, "step": 86340 }, { "epoch": 2.512144747941938, "grad_norm": 14.0625, "learning_rate": 1.6261846465712483e-06, "loss": 1.5381, "step": 86360 }, { "epoch": 2.5127265322744856, "grad_norm": 13.1875, "learning_rate": 1.6242453648986334e-06, "loss": 1.445, "step": 86380 }, { "epoch": 2.513308316607034, "grad_norm": 11.9375, "learning_rate": 1.6223060832260185e-06, "loss": 1.5091, "step": 86400 }, { "epoch": 2.5138901009395815, "grad_norm": 13.5625, "learning_rate": 1.6203668015534036e-06, "loss": 1.5194, "step": 86420 }, { "epoch": 2.5144718852721297, "grad_norm": 16.375, "learning_rate": 1.6184275198807887e-06, "loss": 1.4995, "step": 86440 }, { "epoch": 2.5150536696046775, "grad_norm": 13.8125, "learning_rate": 1.6164882382081736e-06, "loss": 1.5071, "step": 86460 }, { "epoch": 2.515635453937225, "grad_norm": 10.1875, "learning_rate": 1.6145489565355587e-06, "loss": 1.4894, "step": 86480 }, { "epoch": 2.5162172382697734, "grad_norm": 11.9375, "learning_rate": 1.6126096748629438e-06, "loss": 1.5848, "step": 86500 }, { "epoch": 2.5167990226023216, "grad_norm": 13.4375, "learning_rate": 1.6106703931903289e-06, "loss": 1.5928, "step": 86520 }, { "epoch": 2.5173808069348693, "grad_norm": 12.4375, "learning_rate": 1.608731111517714e-06, "loss": 1.4975, "step": 86540 }, { "epoch": 2.517962591267417, "grad_norm": 14.0, "learning_rate": 1.606791829845099e-06, "loss": 1.5314, "step": 86560 }, { "epoch": 2.5185443755999652, "grad_norm": 14.4375, "learning_rate": 1.6048525481724842e-06, "loss": 1.4563, "step": 86580 }, { "epoch": 2.519126159932513, "grad_norm": 13.9375, "learning_rate": 1.6029132664998693e-06, "loss": 1.4916, "step": 86600 }, { "epoch": 2.519707944265061, "grad_norm": 16.375, "learning_rate": 1.6009739848272544e-06, "loss": 1.5425, "step": 86620 }, { "epoch": 2.520289728597609, "grad_norm": 13.5, "learning_rate": 1.5990347031546395e-06, "loss": 1.4995, "step": 86640 }, { "epoch": 2.5208715129301567, "grad_norm": 14.75, "learning_rate": 1.5970954214820244e-06, "loss": 1.4136, "step": 86660 }, { "epoch": 2.521453297262705, "grad_norm": 11.25, "learning_rate": 1.5951561398094095e-06, "loss": 1.5536, "step": 86680 }, { "epoch": 2.5220350815952526, "grad_norm": 11.0, "learning_rate": 1.5932168581367946e-06, "loss": 1.5162, "step": 86700 }, { "epoch": 2.5226168659278008, "grad_norm": 16.125, "learning_rate": 1.5912775764641797e-06, "loss": 1.5166, "step": 86720 }, { "epoch": 2.5231986502603485, "grad_norm": 10.4375, "learning_rate": 1.5893382947915648e-06, "loss": 1.4429, "step": 86740 }, { "epoch": 2.5237804345928962, "grad_norm": 17.5, "learning_rate": 1.5873990131189498e-06, "loss": 1.4458, "step": 86760 }, { "epoch": 2.5243622189254444, "grad_norm": 14.0625, "learning_rate": 1.585459731446335e-06, "loss": 1.4595, "step": 86780 }, { "epoch": 2.524944003257992, "grad_norm": 13.4375, "learning_rate": 1.58352044977372e-06, "loss": 1.5339, "step": 86800 }, { "epoch": 2.5255257875905404, "grad_norm": 14.5, "learning_rate": 1.5815811681011051e-06, "loss": 1.5444, "step": 86820 }, { "epoch": 2.526107571923088, "grad_norm": 16.5, "learning_rate": 1.5796418864284902e-06, "loss": 1.4465, "step": 86840 }, { "epoch": 2.526689356255636, "grad_norm": 14.6875, "learning_rate": 1.5777026047558751e-06, "loss": 1.5121, "step": 86860 }, { "epoch": 2.527271140588184, "grad_norm": 11.5625, "learning_rate": 1.5757633230832602e-06, "loss": 1.4736, "step": 86880 }, { "epoch": 2.5278529249207318, "grad_norm": 16.375, "learning_rate": 1.5738240414106451e-06, "loss": 1.546, "step": 86900 }, { "epoch": 2.52843470925328, "grad_norm": 13.5, "learning_rate": 1.5718847597380302e-06, "loss": 1.4419, "step": 86920 }, { "epoch": 2.5290164935858277, "grad_norm": 12.1875, "learning_rate": 1.5699454780654153e-06, "loss": 1.4853, "step": 86940 }, { "epoch": 2.5295982779183754, "grad_norm": 13.8125, "learning_rate": 1.5680061963928004e-06, "loss": 1.492, "step": 86960 }, { "epoch": 2.5301800622509236, "grad_norm": 14.0, "learning_rate": 1.5660669147201855e-06, "loss": 1.4927, "step": 86980 }, { "epoch": 2.5307618465834714, "grad_norm": 13.5625, "learning_rate": 1.5641276330475704e-06, "loss": 1.504, "step": 87000 }, { "epoch": 2.5313436309160195, "grad_norm": 14.0, "learning_rate": 1.5621883513749555e-06, "loss": 1.4816, "step": 87020 }, { "epoch": 2.5319254152485673, "grad_norm": 15.5625, "learning_rate": 1.5602490697023406e-06, "loss": 1.5125, "step": 87040 }, { "epoch": 2.532507199581115, "grad_norm": 11.4375, "learning_rate": 1.5583097880297257e-06, "loss": 1.5393, "step": 87060 }, { "epoch": 2.533088983913663, "grad_norm": 13.5625, "learning_rate": 1.5563705063571108e-06, "loss": 1.4948, "step": 87080 }, { "epoch": 2.5336707682462114, "grad_norm": 17.375, "learning_rate": 1.5544312246844959e-06, "loss": 1.5009, "step": 87100 }, { "epoch": 2.534252552578759, "grad_norm": 13.25, "learning_rate": 1.552491943011881e-06, "loss": 1.491, "step": 87120 }, { "epoch": 2.534834336911307, "grad_norm": 12.25, "learning_rate": 1.550552661339266e-06, "loss": 1.4954, "step": 87140 }, { "epoch": 2.5354161212438546, "grad_norm": 14.625, "learning_rate": 1.5486133796666512e-06, "loss": 1.4881, "step": 87160 }, { "epoch": 2.535997905576403, "grad_norm": 11.375, "learning_rate": 1.5466740979940363e-06, "loss": 1.4248, "step": 87180 }, { "epoch": 2.536579689908951, "grad_norm": 14.8125, "learning_rate": 1.5447348163214211e-06, "loss": 1.4026, "step": 87200 }, { "epoch": 2.5371614742414987, "grad_norm": 15.375, "learning_rate": 1.5427955346488062e-06, "loss": 1.5348, "step": 87220 }, { "epoch": 2.5377432585740465, "grad_norm": 13.9375, "learning_rate": 1.5408562529761913e-06, "loss": 1.4726, "step": 87240 }, { "epoch": 2.5383250429065947, "grad_norm": 11.9375, "learning_rate": 1.5389169713035764e-06, "loss": 1.4464, "step": 87260 }, { "epoch": 2.5389068272391424, "grad_norm": 11.625, "learning_rate": 1.5369776896309615e-06, "loss": 1.5215, "step": 87280 }, { "epoch": 2.5394886115716906, "grad_norm": 12.25, "learning_rate": 1.5350384079583466e-06, "loss": 1.5135, "step": 87300 }, { "epoch": 2.5400703959042383, "grad_norm": 14.25, "learning_rate": 1.5330991262857317e-06, "loss": 1.5343, "step": 87320 }, { "epoch": 2.540652180236786, "grad_norm": 13.8125, "learning_rate": 1.5311598446131168e-06, "loss": 1.4614, "step": 87340 }, { "epoch": 2.5412339645693343, "grad_norm": 12.5, "learning_rate": 1.529220562940502e-06, "loss": 1.3836, "step": 87360 }, { "epoch": 2.541815748901882, "grad_norm": 15.625, "learning_rate": 1.527281281267887e-06, "loss": 1.4345, "step": 87380 }, { "epoch": 2.54239753323443, "grad_norm": 12.0625, "learning_rate": 1.5253419995952721e-06, "loss": 1.5226, "step": 87400 }, { "epoch": 2.542979317566978, "grad_norm": 11.8125, "learning_rate": 1.523402717922657e-06, "loss": 1.4521, "step": 87420 }, { "epoch": 2.5435611018995257, "grad_norm": 13.0625, "learning_rate": 1.521463436250042e-06, "loss": 1.5146, "step": 87440 }, { "epoch": 2.544142886232074, "grad_norm": 11.6875, "learning_rate": 1.5195241545774272e-06, "loss": 1.6255, "step": 87460 }, { "epoch": 2.5447246705646216, "grad_norm": 14.125, "learning_rate": 1.5175848729048123e-06, "loss": 1.43, "step": 87480 }, { "epoch": 2.54530645489717, "grad_norm": 14.375, "learning_rate": 1.5156455912321974e-06, "loss": 1.4608, "step": 87500 }, { "epoch": 2.5458882392297175, "grad_norm": 12.0, "learning_rate": 1.5137063095595825e-06, "loss": 1.5352, "step": 87520 }, { "epoch": 2.5464700235622653, "grad_norm": 13.625, "learning_rate": 1.5117670278869676e-06, "loss": 1.575, "step": 87540 }, { "epoch": 2.5470518078948134, "grad_norm": 12.8125, "learning_rate": 1.5098277462143527e-06, "loss": 1.4455, "step": 87560 }, { "epoch": 2.547633592227361, "grad_norm": 12.5, "learning_rate": 1.5078884645417378e-06, "loss": 1.4836, "step": 87580 }, { "epoch": 2.5482153765599094, "grad_norm": 13.5625, "learning_rate": 1.5059491828691229e-06, "loss": 1.3722, "step": 87600 }, { "epoch": 2.548797160892457, "grad_norm": 13.125, "learning_rate": 1.5040099011965078e-06, "loss": 1.4748, "step": 87620 }, { "epoch": 2.549378945225005, "grad_norm": 11.875, "learning_rate": 1.5020706195238929e-06, "loss": 1.5758, "step": 87640 }, { "epoch": 2.549960729557553, "grad_norm": 16.875, "learning_rate": 1.500131337851278e-06, "loss": 1.5294, "step": 87660 }, { "epoch": 2.550542513890101, "grad_norm": 13.625, "learning_rate": 1.498192056178663e-06, "loss": 1.5163, "step": 87680 }, { "epoch": 2.551124298222649, "grad_norm": 15.125, "learning_rate": 1.4962527745060482e-06, "loss": 1.5919, "step": 87700 }, { "epoch": 2.5517060825551967, "grad_norm": 12.25, "learning_rate": 1.4943134928334333e-06, "loss": 1.5041, "step": 87720 }, { "epoch": 2.5522878668877444, "grad_norm": 15.8125, "learning_rate": 1.4923742111608184e-06, "loss": 1.4404, "step": 87740 }, { "epoch": 2.5528696512202926, "grad_norm": 11.75, "learning_rate": 1.4904349294882035e-06, "loss": 1.5303, "step": 87760 }, { "epoch": 2.553451435552841, "grad_norm": 12.8125, "learning_rate": 1.4884956478155885e-06, "loss": 1.5042, "step": 87780 }, { "epoch": 2.5540332198853886, "grad_norm": 14.5625, "learning_rate": 1.4865563661429736e-06, "loss": 1.4936, "step": 87800 }, { "epoch": 2.5546150042179363, "grad_norm": 16.25, "learning_rate": 1.4846170844703585e-06, "loss": 1.5642, "step": 87820 }, { "epoch": 2.5551967885504845, "grad_norm": 9.0625, "learning_rate": 1.4826778027977436e-06, "loss": 1.4549, "step": 87840 }, { "epoch": 2.5557785728830322, "grad_norm": 13.75, "learning_rate": 1.4807385211251287e-06, "loss": 1.514, "step": 87860 }, { "epoch": 2.5563603572155804, "grad_norm": 13.5, "learning_rate": 1.4787992394525138e-06, "loss": 1.4641, "step": 87880 }, { "epoch": 2.556942141548128, "grad_norm": 12.1875, "learning_rate": 1.476859957779899e-06, "loss": 1.5321, "step": 87900 }, { "epoch": 2.557523925880676, "grad_norm": 17.25, "learning_rate": 1.474920676107284e-06, "loss": 1.462, "step": 87920 }, { "epoch": 2.558105710213224, "grad_norm": 15.5625, "learning_rate": 1.4729813944346691e-06, "loss": 1.5143, "step": 87940 }, { "epoch": 2.558687494545772, "grad_norm": 12.5625, "learning_rate": 1.4710421127620538e-06, "loss": 1.4539, "step": 87960 }, { "epoch": 2.55926927887832, "grad_norm": 12.125, "learning_rate": 1.4691028310894389e-06, "loss": 1.451, "step": 87980 }, { "epoch": 2.5598510632108677, "grad_norm": 13.5, "learning_rate": 1.467163549416824e-06, "loss": 1.5119, "step": 88000 }, { "epoch": 2.5604328475434155, "grad_norm": 13.3125, "learning_rate": 1.465224267744209e-06, "loss": 1.4436, "step": 88020 }, { "epoch": 2.5610146318759637, "grad_norm": 11.5, "learning_rate": 1.4632849860715942e-06, "loss": 1.436, "step": 88040 }, { "epoch": 2.5615964162085114, "grad_norm": 12.1875, "learning_rate": 1.4613457043989793e-06, "loss": 1.4874, "step": 88060 }, { "epoch": 2.5621782005410596, "grad_norm": 16.25, "learning_rate": 1.4594064227263644e-06, "loss": 1.4848, "step": 88080 }, { "epoch": 2.5627599848736073, "grad_norm": 12.6875, "learning_rate": 1.4574671410537495e-06, "loss": 1.4135, "step": 88100 }, { "epoch": 2.563341769206155, "grad_norm": 13.625, "learning_rate": 1.4555278593811346e-06, "loss": 1.4105, "step": 88120 }, { "epoch": 2.5639235535387033, "grad_norm": 15.9375, "learning_rate": 1.4535885777085197e-06, "loss": 1.4694, "step": 88140 }, { "epoch": 2.564505337871251, "grad_norm": 13.5625, "learning_rate": 1.4516492960359046e-06, "loss": 1.4228, "step": 88160 }, { "epoch": 2.565087122203799, "grad_norm": 12.1875, "learning_rate": 1.4497100143632897e-06, "loss": 1.4681, "step": 88180 }, { "epoch": 2.565668906536347, "grad_norm": 13.375, "learning_rate": 1.4477707326906747e-06, "loss": 1.552, "step": 88200 }, { "epoch": 2.5662506908688947, "grad_norm": 10.75, "learning_rate": 1.4458314510180598e-06, "loss": 1.5275, "step": 88220 }, { "epoch": 2.566832475201443, "grad_norm": 13.0625, "learning_rate": 1.443892169345445e-06, "loss": 1.4954, "step": 88240 }, { "epoch": 2.5674142595339906, "grad_norm": 12.625, "learning_rate": 1.44195288767283e-06, "loss": 1.5454, "step": 88260 }, { "epoch": 2.567996043866539, "grad_norm": 10.6875, "learning_rate": 1.4400136060002151e-06, "loss": 1.4329, "step": 88280 }, { "epoch": 2.5685778281990865, "grad_norm": 13.1875, "learning_rate": 1.4380743243276002e-06, "loss": 1.4786, "step": 88300 }, { "epoch": 2.5691596125316343, "grad_norm": 15.0, "learning_rate": 1.4361350426549853e-06, "loss": 1.4678, "step": 88320 }, { "epoch": 2.5697413968641825, "grad_norm": 11.25, "learning_rate": 1.4341957609823704e-06, "loss": 1.5508, "step": 88340 }, { "epoch": 2.5703231811967306, "grad_norm": 11.0625, "learning_rate": 1.4322564793097553e-06, "loss": 1.4506, "step": 88360 }, { "epoch": 2.5709049655292784, "grad_norm": 12.0, "learning_rate": 1.4303171976371404e-06, "loss": 1.4891, "step": 88380 }, { "epoch": 2.571486749861826, "grad_norm": 15.125, "learning_rate": 1.4283779159645255e-06, "loss": 1.5108, "step": 88400 }, { "epoch": 2.5720685341943743, "grad_norm": 12.875, "learning_rate": 1.4264386342919106e-06, "loss": 1.4628, "step": 88420 }, { "epoch": 2.572650318526922, "grad_norm": 7.8125, "learning_rate": 1.4244993526192957e-06, "loss": 1.4989, "step": 88440 }, { "epoch": 2.5732321028594702, "grad_norm": 14.0, "learning_rate": 1.4225600709466808e-06, "loss": 1.5064, "step": 88460 }, { "epoch": 2.573813887192018, "grad_norm": 14.4375, "learning_rate": 1.420620789274066e-06, "loss": 1.4556, "step": 88480 }, { "epoch": 2.5743956715245657, "grad_norm": 16.25, "learning_rate": 1.418681507601451e-06, "loss": 1.3963, "step": 88500 }, { "epoch": 2.574977455857114, "grad_norm": 13.75, "learning_rate": 1.416742225928836e-06, "loss": 1.5654, "step": 88520 }, { "epoch": 2.5755592401896616, "grad_norm": 16.5, "learning_rate": 1.4148029442562212e-06, "loss": 1.5069, "step": 88540 }, { "epoch": 2.57614102452221, "grad_norm": 13.875, "learning_rate": 1.4128636625836063e-06, "loss": 1.5128, "step": 88560 }, { "epoch": 2.5767228088547576, "grad_norm": 13.5, "learning_rate": 1.4109243809109912e-06, "loss": 1.4906, "step": 88580 }, { "epoch": 2.5773045931873053, "grad_norm": 15.375, "learning_rate": 1.4089850992383763e-06, "loss": 1.5292, "step": 88600 }, { "epoch": 2.5778863775198535, "grad_norm": 15.75, "learning_rate": 1.4070458175657614e-06, "loss": 1.4152, "step": 88620 }, { "epoch": 2.5784681618524012, "grad_norm": 13.375, "learning_rate": 1.4051065358931465e-06, "loss": 1.4743, "step": 88640 }, { "epoch": 2.5790499461849494, "grad_norm": 13.8125, "learning_rate": 1.4031672542205316e-06, "loss": 1.5106, "step": 88660 }, { "epoch": 2.579631730517497, "grad_norm": 14.125, "learning_rate": 1.4012279725479167e-06, "loss": 1.4018, "step": 88680 }, { "epoch": 2.580213514850045, "grad_norm": 14.0, "learning_rate": 1.3992886908753018e-06, "loss": 1.5591, "step": 88700 }, { "epoch": 2.580795299182593, "grad_norm": 15.5625, "learning_rate": 1.3973494092026869e-06, "loss": 1.4992, "step": 88720 }, { "epoch": 2.581377083515141, "grad_norm": 12.125, "learning_rate": 1.395410127530072e-06, "loss": 1.478, "step": 88740 }, { "epoch": 2.581958867847689, "grad_norm": 12.25, "learning_rate": 1.393470845857457e-06, "loss": 1.4129, "step": 88760 }, { "epoch": 2.5825406521802368, "grad_norm": 17.75, "learning_rate": 1.391531564184842e-06, "loss": 1.4793, "step": 88780 }, { "epoch": 2.5831224365127845, "grad_norm": 13.5625, "learning_rate": 1.389592282512227e-06, "loss": 1.5708, "step": 88800 }, { "epoch": 2.5837042208453327, "grad_norm": 12.75, "learning_rate": 1.3876530008396121e-06, "loss": 1.5207, "step": 88820 }, { "epoch": 2.5842860051778804, "grad_norm": 13.5625, "learning_rate": 1.3857137191669972e-06, "loss": 1.5063, "step": 88840 }, { "epoch": 2.5848677895104286, "grad_norm": 12.0625, "learning_rate": 1.3837744374943823e-06, "loss": 1.5515, "step": 88860 }, { "epoch": 2.5854495738429764, "grad_norm": 14.9375, "learning_rate": 1.3818351558217674e-06, "loss": 1.5853, "step": 88880 }, { "epoch": 2.586031358175524, "grad_norm": 13.625, "learning_rate": 1.3798958741491525e-06, "loss": 1.569, "step": 88900 }, { "epoch": 2.5866131425080723, "grad_norm": 13.4375, "learning_rate": 1.3779565924765376e-06, "loss": 1.5617, "step": 88920 }, { "epoch": 2.58719492684062, "grad_norm": 13.0625, "learning_rate": 1.3760173108039227e-06, "loss": 1.4424, "step": 88940 }, { "epoch": 2.587776711173168, "grad_norm": 12.75, "learning_rate": 1.3740780291313078e-06, "loss": 1.5559, "step": 88960 }, { "epoch": 2.588358495505716, "grad_norm": 11.375, "learning_rate": 1.3721387474586927e-06, "loss": 1.5516, "step": 88980 }, { "epoch": 2.5889402798382637, "grad_norm": 12.625, "learning_rate": 1.3701994657860778e-06, "loss": 1.5243, "step": 89000 }, { "epoch": 2.589522064170812, "grad_norm": 14.0625, "learning_rate": 1.3682601841134627e-06, "loss": 1.4705, "step": 89020 }, { "epoch": 2.59010384850336, "grad_norm": 14.375, "learning_rate": 1.3663209024408478e-06, "loss": 1.4796, "step": 89040 }, { "epoch": 2.590685632835908, "grad_norm": 12.3125, "learning_rate": 1.3643816207682329e-06, "loss": 1.5771, "step": 89060 }, { "epoch": 2.5912674171684555, "grad_norm": 16.25, "learning_rate": 1.362442339095618e-06, "loss": 1.5066, "step": 89080 }, { "epoch": 2.5918492015010037, "grad_norm": 11.9375, "learning_rate": 1.360503057423003e-06, "loss": 1.5614, "step": 89100 }, { "epoch": 2.5924309858335515, "grad_norm": 15.5, "learning_rate": 1.358563775750388e-06, "loss": 1.5666, "step": 89120 }, { "epoch": 2.5930127701660997, "grad_norm": 14.9375, "learning_rate": 1.356624494077773e-06, "loss": 1.5544, "step": 89140 }, { "epoch": 2.5935945544986474, "grad_norm": 14.5, "learning_rate": 1.3546852124051582e-06, "loss": 1.4609, "step": 89160 }, { "epoch": 2.594176338831195, "grad_norm": 14.375, "learning_rate": 1.3527459307325433e-06, "loss": 1.4836, "step": 89180 }, { "epoch": 2.5947581231637433, "grad_norm": 8.625, "learning_rate": 1.3508066490599284e-06, "loss": 1.4095, "step": 89200 }, { "epoch": 2.595339907496291, "grad_norm": 17.875, "learning_rate": 1.3488673673873135e-06, "loss": 1.4704, "step": 89220 }, { "epoch": 2.5959216918288393, "grad_norm": 13.3125, "learning_rate": 1.3469280857146985e-06, "loss": 1.5322, "step": 89240 }, { "epoch": 2.596503476161387, "grad_norm": 13.125, "learning_rate": 1.3449888040420836e-06, "loss": 1.5451, "step": 89260 }, { "epoch": 2.5970852604939347, "grad_norm": 13.125, "learning_rate": 1.3430495223694687e-06, "loss": 1.5784, "step": 89280 }, { "epoch": 2.597667044826483, "grad_norm": 13.125, "learning_rate": 1.3411102406968538e-06, "loss": 1.5883, "step": 89300 }, { "epoch": 2.5982488291590307, "grad_norm": 15.375, "learning_rate": 1.3391709590242387e-06, "loss": 1.5509, "step": 89320 }, { "epoch": 2.598830613491579, "grad_norm": 10.75, "learning_rate": 1.3372316773516238e-06, "loss": 1.5002, "step": 89340 }, { "epoch": 2.5994123978241266, "grad_norm": 16.25, "learning_rate": 1.335292395679009e-06, "loss": 1.5081, "step": 89360 }, { "epoch": 2.5999941821566743, "grad_norm": 15.6875, "learning_rate": 1.333353114006394e-06, "loss": 1.4374, "step": 89380 }, { "epoch": 2.6005759664892225, "grad_norm": 13.5, "learning_rate": 1.3314138323337791e-06, "loss": 1.5208, "step": 89400 }, { "epoch": 2.6011577508217703, "grad_norm": 15.25, "learning_rate": 1.3294745506611642e-06, "loss": 1.5339, "step": 89420 }, { "epoch": 2.6017395351543184, "grad_norm": 12.3125, "learning_rate": 1.3275352689885493e-06, "loss": 1.4808, "step": 89440 }, { "epoch": 2.602321319486866, "grad_norm": 11.8125, "learning_rate": 1.3255959873159344e-06, "loss": 1.4434, "step": 89460 }, { "epoch": 2.602903103819414, "grad_norm": 15.0625, "learning_rate": 1.3236567056433195e-06, "loss": 1.4781, "step": 89480 }, { "epoch": 2.603484888151962, "grad_norm": 13.4375, "learning_rate": 1.3217174239707046e-06, "loss": 1.4987, "step": 89500 }, { "epoch": 2.60406667248451, "grad_norm": 16.625, "learning_rate": 1.3197781422980895e-06, "loss": 1.4983, "step": 89520 }, { "epoch": 2.604648456817058, "grad_norm": 10.9375, "learning_rate": 1.3178388606254746e-06, "loss": 1.5454, "step": 89540 }, { "epoch": 2.6052302411496058, "grad_norm": 12.75, "learning_rate": 1.3158995789528597e-06, "loss": 1.569, "step": 89560 }, { "epoch": 2.6058120254821535, "grad_norm": 14.0625, "learning_rate": 1.3139602972802448e-06, "loss": 1.4457, "step": 89580 }, { "epoch": 2.6063938098147017, "grad_norm": 13.8125, "learning_rate": 1.3120210156076299e-06, "loss": 1.5113, "step": 89600 }, { "epoch": 2.60697559414725, "grad_norm": 16.125, "learning_rate": 1.310081733935015e-06, "loss": 1.4794, "step": 89620 }, { "epoch": 2.6075573784797976, "grad_norm": 15.125, "learning_rate": 1.3081424522624e-06, "loss": 1.4272, "step": 89640 }, { "epoch": 2.6081391628123454, "grad_norm": 12.6875, "learning_rate": 1.3062031705897852e-06, "loss": 1.5364, "step": 89660 }, { "epoch": 2.6087209471448936, "grad_norm": 11.6875, "learning_rate": 1.3042638889171703e-06, "loss": 1.4851, "step": 89680 }, { "epoch": 2.6093027314774413, "grad_norm": 12.6875, "learning_rate": 1.3023246072445554e-06, "loss": 1.4141, "step": 89700 }, { "epoch": 2.6098845158099895, "grad_norm": 12.0625, "learning_rate": 1.3003853255719405e-06, "loss": 1.5233, "step": 89720 }, { "epoch": 2.610466300142537, "grad_norm": 12.4375, "learning_rate": 1.2984460438993253e-06, "loss": 1.4895, "step": 89740 }, { "epoch": 2.611048084475085, "grad_norm": 16.0, "learning_rate": 1.2965067622267104e-06, "loss": 1.4961, "step": 89760 }, { "epoch": 2.611629868807633, "grad_norm": 10.625, "learning_rate": 1.2945674805540955e-06, "loss": 1.5121, "step": 89780 }, { "epoch": 2.612211653140181, "grad_norm": 14.3125, "learning_rate": 1.2926281988814806e-06, "loss": 1.5176, "step": 89800 }, { "epoch": 2.612793437472729, "grad_norm": 14.625, "learning_rate": 1.2906889172088657e-06, "loss": 1.488, "step": 89820 }, { "epoch": 2.613375221805277, "grad_norm": 15.0, "learning_rate": 1.2887496355362508e-06, "loss": 1.5184, "step": 89840 }, { "epoch": 2.6139570061378246, "grad_norm": 14.1875, "learning_rate": 1.286810353863636e-06, "loss": 1.525, "step": 89860 }, { "epoch": 2.6145387904703727, "grad_norm": 12.375, "learning_rate": 1.284871072191021e-06, "loss": 1.4853, "step": 89880 }, { "epoch": 2.6151205748029205, "grad_norm": 12.1875, "learning_rate": 1.2829317905184061e-06, "loss": 1.5604, "step": 89900 }, { "epoch": 2.6157023591354687, "grad_norm": 15.1875, "learning_rate": 1.2809925088457912e-06, "loss": 1.5214, "step": 89920 }, { "epoch": 2.6162841434680164, "grad_norm": 15.25, "learning_rate": 1.2790532271731761e-06, "loss": 1.4771, "step": 89940 }, { "epoch": 2.616865927800564, "grad_norm": 14.8125, "learning_rate": 1.2771139455005612e-06, "loss": 1.414, "step": 89960 }, { "epoch": 2.6174477121331123, "grad_norm": 13.3125, "learning_rate": 1.2751746638279463e-06, "loss": 1.43, "step": 89980 }, { "epoch": 2.61802949646566, "grad_norm": 13.75, "learning_rate": 1.2732353821553314e-06, "loss": 1.3977, "step": 90000 }, { "epoch": 2.6186112807982083, "grad_norm": 12.9375, "learning_rate": 1.2712961004827165e-06, "loss": 1.491, "step": 90020 }, { "epoch": 2.619193065130756, "grad_norm": 14.25, "learning_rate": 1.2693568188101016e-06, "loss": 1.541, "step": 90040 }, { "epoch": 2.6197748494633037, "grad_norm": 13.875, "learning_rate": 1.2674175371374867e-06, "loss": 1.4521, "step": 90060 }, { "epoch": 2.620356633795852, "grad_norm": 13.1875, "learning_rate": 1.2654782554648714e-06, "loss": 1.5054, "step": 90080 }, { "epoch": 2.6209384181283997, "grad_norm": 11.5, "learning_rate": 1.2635389737922565e-06, "loss": 1.5068, "step": 90100 }, { "epoch": 2.621520202460948, "grad_norm": 15.1875, "learning_rate": 1.2615996921196416e-06, "loss": 1.4902, "step": 90120 }, { "epoch": 2.6221019867934956, "grad_norm": 12.5625, "learning_rate": 1.2596604104470267e-06, "loss": 1.3949, "step": 90140 }, { "epoch": 2.6226837711260433, "grad_norm": 13.0625, "learning_rate": 1.2577211287744118e-06, "loss": 1.4714, "step": 90160 }, { "epoch": 2.6232655554585915, "grad_norm": 14.5, "learning_rate": 1.2557818471017969e-06, "loss": 1.4378, "step": 90180 }, { "epoch": 2.6238473397911393, "grad_norm": 14.0, "learning_rate": 1.253842565429182e-06, "loss": 1.5399, "step": 90200 }, { "epoch": 2.6244291241236875, "grad_norm": 12.375, "learning_rate": 1.251903283756567e-06, "loss": 1.5688, "step": 90220 }, { "epoch": 2.625010908456235, "grad_norm": 13.1875, "learning_rate": 1.2499640020839522e-06, "loss": 1.4745, "step": 90240 }, { "epoch": 2.625592692788783, "grad_norm": 14.5625, "learning_rate": 1.2480247204113372e-06, "loss": 1.5646, "step": 90260 }, { "epoch": 2.626174477121331, "grad_norm": 12.625, "learning_rate": 1.2460854387387223e-06, "loss": 1.435, "step": 90280 }, { "epoch": 2.6267562614538793, "grad_norm": 15.8125, "learning_rate": 1.2441461570661074e-06, "loss": 1.558, "step": 90300 }, { "epoch": 2.627338045786427, "grad_norm": 12.875, "learning_rate": 1.2422068753934925e-06, "loss": 1.4167, "step": 90320 }, { "epoch": 2.627919830118975, "grad_norm": 15.25, "learning_rate": 1.2402675937208776e-06, "loss": 1.4858, "step": 90340 }, { "epoch": 2.628501614451523, "grad_norm": 12.8125, "learning_rate": 1.2383283120482625e-06, "loss": 1.4881, "step": 90360 }, { "epoch": 2.6290833987840707, "grad_norm": 15.75, "learning_rate": 1.2363890303756476e-06, "loss": 1.5406, "step": 90380 }, { "epoch": 2.629665183116619, "grad_norm": 13.0, "learning_rate": 1.2344497487030327e-06, "loss": 1.5149, "step": 90400 }, { "epoch": 2.6302469674491666, "grad_norm": 13.875, "learning_rate": 1.2325104670304178e-06, "loss": 1.5103, "step": 90420 }, { "epoch": 2.6308287517817144, "grad_norm": 14.5625, "learning_rate": 1.230571185357803e-06, "loss": 1.459, "step": 90440 }, { "epoch": 2.6314105361142626, "grad_norm": 16.0, "learning_rate": 1.228631903685188e-06, "loss": 1.4727, "step": 90460 }, { "epoch": 2.6319923204468103, "grad_norm": 14.5625, "learning_rate": 1.226692622012573e-06, "loss": 1.4068, "step": 90480 }, { "epoch": 2.6325741047793585, "grad_norm": 15.75, "learning_rate": 1.224753340339958e-06, "loss": 1.532, "step": 90500 }, { "epoch": 2.6331558891119062, "grad_norm": 14.0, "learning_rate": 1.222814058667343e-06, "loss": 1.5277, "step": 90520 }, { "epoch": 2.633737673444454, "grad_norm": 16.25, "learning_rate": 1.2208747769947282e-06, "loss": 1.5638, "step": 90540 }, { "epoch": 2.634319457777002, "grad_norm": 14.625, "learning_rate": 1.2189354953221133e-06, "loss": 1.4908, "step": 90560 }, { "epoch": 2.63490124210955, "grad_norm": 13.5625, "learning_rate": 1.2169962136494984e-06, "loss": 1.541, "step": 90580 }, { "epoch": 2.635483026442098, "grad_norm": 14.125, "learning_rate": 1.2150569319768835e-06, "loss": 1.5557, "step": 90600 }, { "epoch": 2.636064810774646, "grad_norm": 14.5625, "learning_rate": 1.2131176503042686e-06, "loss": 1.4827, "step": 90620 }, { "epoch": 2.6366465951071936, "grad_norm": 13.5625, "learning_rate": 1.2111783686316537e-06, "loss": 1.4529, "step": 90640 }, { "epoch": 2.6372283794397418, "grad_norm": 12.875, "learning_rate": 1.2092390869590388e-06, "loss": 1.5072, "step": 90660 }, { "epoch": 2.6378101637722895, "grad_norm": 12.4375, "learning_rate": 1.2072998052864237e-06, "loss": 1.4667, "step": 90680 }, { "epoch": 2.6383919481048377, "grad_norm": 15.625, "learning_rate": 1.2053605236138088e-06, "loss": 1.4551, "step": 90700 }, { "epoch": 2.6389737324373854, "grad_norm": 12.0, "learning_rate": 1.2034212419411939e-06, "loss": 1.4715, "step": 90720 }, { "epoch": 2.639555516769933, "grad_norm": 12.25, "learning_rate": 1.201481960268579e-06, "loss": 1.4934, "step": 90740 }, { "epoch": 2.6401373011024813, "grad_norm": 16.125, "learning_rate": 1.199542678595964e-06, "loss": 1.4717, "step": 90760 }, { "epoch": 2.640719085435029, "grad_norm": 12.5625, "learning_rate": 1.1976033969233491e-06, "loss": 1.4353, "step": 90780 }, { "epoch": 2.6413008697675773, "grad_norm": 10.0, "learning_rate": 1.1956641152507342e-06, "loss": 1.4902, "step": 90800 }, { "epoch": 2.641882654100125, "grad_norm": 13.0625, "learning_rate": 1.1937248335781193e-06, "loss": 1.4954, "step": 90820 }, { "epoch": 2.6424644384326728, "grad_norm": 12.8125, "learning_rate": 1.1917855519055044e-06, "loss": 1.4846, "step": 90840 }, { "epoch": 2.643046222765221, "grad_norm": 17.0, "learning_rate": 1.1898462702328895e-06, "loss": 1.5161, "step": 90860 }, { "epoch": 2.643628007097769, "grad_norm": 15.1875, "learning_rate": 1.1879069885602744e-06, "loss": 1.4884, "step": 90880 }, { "epoch": 2.644209791430317, "grad_norm": 17.75, "learning_rate": 1.1859677068876595e-06, "loss": 1.4544, "step": 90900 }, { "epoch": 2.6447915757628646, "grad_norm": 15.5625, "learning_rate": 1.1840284252150446e-06, "loss": 1.5529, "step": 90920 }, { "epoch": 2.645373360095413, "grad_norm": 13.625, "learning_rate": 1.1820891435424297e-06, "loss": 1.4614, "step": 90940 }, { "epoch": 2.6459551444279605, "grad_norm": 13.9375, "learning_rate": 1.1801498618698146e-06, "loss": 1.4789, "step": 90960 }, { "epoch": 2.6465369287605087, "grad_norm": 15.0, "learning_rate": 1.1782105801971997e-06, "loss": 1.5151, "step": 90980 }, { "epoch": 2.6471187130930565, "grad_norm": 13.5625, "learning_rate": 1.1762712985245848e-06, "loss": 1.4421, "step": 91000 }, { "epoch": 2.647700497425604, "grad_norm": 12.4375, "learning_rate": 1.1743320168519699e-06, "loss": 1.5315, "step": 91020 }, { "epoch": 2.6482822817581524, "grad_norm": 14.75, "learning_rate": 1.172392735179355e-06, "loss": 1.4751, "step": 91040 }, { "epoch": 2.6488640660907, "grad_norm": 15.8125, "learning_rate": 1.17045345350674e-06, "loss": 1.5342, "step": 91060 }, { "epoch": 2.6494458504232483, "grad_norm": 13.0625, "learning_rate": 1.1685141718341252e-06, "loss": 1.5897, "step": 91080 }, { "epoch": 2.650027634755796, "grad_norm": 15.0625, "learning_rate": 1.1665748901615103e-06, "loss": 1.4432, "step": 91100 }, { "epoch": 2.650609419088344, "grad_norm": 12.3125, "learning_rate": 1.1646356084888954e-06, "loss": 1.471, "step": 91120 }, { "epoch": 2.651191203420892, "grad_norm": 13.1875, "learning_rate": 1.1626963268162805e-06, "loss": 1.5025, "step": 91140 }, { "epoch": 2.6517729877534397, "grad_norm": 13.25, "learning_rate": 1.1607570451436654e-06, "loss": 1.5553, "step": 91160 }, { "epoch": 2.652354772085988, "grad_norm": 13.125, "learning_rate": 1.1588177634710505e-06, "loss": 1.4099, "step": 91180 }, { "epoch": 2.6529365564185357, "grad_norm": 13.5625, "learning_rate": 1.1568784817984356e-06, "loss": 1.5497, "step": 91200 }, { "epoch": 2.6535183407510834, "grad_norm": 15.5, "learning_rate": 1.1549392001258207e-06, "loss": 1.5543, "step": 91220 }, { "epoch": 2.6541001250836316, "grad_norm": 8.625, "learning_rate": 1.1529999184532058e-06, "loss": 1.4677, "step": 91240 }, { "epoch": 2.6546819094161793, "grad_norm": 15.125, "learning_rate": 1.1510606367805909e-06, "loss": 1.5021, "step": 91260 }, { "epoch": 2.6552636937487275, "grad_norm": 14.8125, "learning_rate": 1.149121355107976e-06, "loss": 1.5207, "step": 91280 }, { "epoch": 2.6558454780812752, "grad_norm": 14.625, "learning_rate": 1.147182073435361e-06, "loss": 1.5079, "step": 91300 }, { "epoch": 2.656427262413823, "grad_norm": 15.5625, "learning_rate": 1.1452427917627461e-06, "loss": 1.3869, "step": 91320 }, { "epoch": 2.657009046746371, "grad_norm": 10.875, "learning_rate": 1.1433035100901312e-06, "loss": 1.4704, "step": 91340 }, { "epoch": 2.657590831078919, "grad_norm": 14.9375, "learning_rate": 1.1413642284175163e-06, "loss": 1.5158, "step": 91360 }, { "epoch": 2.658172615411467, "grad_norm": 13.625, "learning_rate": 1.1394249467449012e-06, "loss": 1.4783, "step": 91380 }, { "epoch": 2.658754399744015, "grad_norm": 12.5, "learning_rate": 1.1374856650722863e-06, "loss": 1.4656, "step": 91400 }, { "epoch": 2.6593361840765626, "grad_norm": 16.625, "learning_rate": 1.1355463833996714e-06, "loss": 1.5551, "step": 91420 }, { "epoch": 2.6599179684091108, "grad_norm": 11.4375, "learning_rate": 1.1336071017270563e-06, "loss": 1.5305, "step": 91440 }, { "epoch": 2.6604997527416585, "grad_norm": 19.0, "learning_rate": 1.1316678200544414e-06, "loss": 1.5697, "step": 91460 }, { "epoch": 2.6610815370742067, "grad_norm": 10.625, "learning_rate": 1.1297285383818265e-06, "loss": 1.4515, "step": 91480 }, { "epoch": 2.6616633214067544, "grad_norm": 13.25, "learning_rate": 1.1277892567092116e-06, "loss": 1.5167, "step": 91500 }, { "epoch": 2.662245105739302, "grad_norm": 12.0625, "learning_rate": 1.1258499750365967e-06, "loss": 1.4129, "step": 91520 }, { "epoch": 2.6628268900718504, "grad_norm": 14.0625, "learning_rate": 1.1239106933639818e-06, "loss": 1.5106, "step": 91540 }, { "epoch": 2.6634086744043985, "grad_norm": 13.1875, "learning_rate": 1.1219714116913669e-06, "loss": 1.5203, "step": 91560 }, { "epoch": 2.6639904587369463, "grad_norm": 14.5, "learning_rate": 1.120032130018752e-06, "loss": 1.5161, "step": 91580 }, { "epoch": 2.664572243069494, "grad_norm": 14.125, "learning_rate": 1.118092848346137e-06, "loss": 1.5112, "step": 91600 }, { "epoch": 2.665154027402042, "grad_norm": 13.6875, "learning_rate": 1.1161535666735222e-06, "loss": 1.417, "step": 91620 }, { "epoch": 2.66573581173459, "grad_norm": 13.375, "learning_rate": 1.114214285000907e-06, "loss": 1.4804, "step": 91640 }, { "epoch": 2.666317596067138, "grad_norm": 11.5625, "learning_rate": 1.1122750033282922e-06, "loss": 1.4938, "step": 91660 }, { "epoch": 2.666899380399686, "grad_norm": 11.3125, "learning_rate": 1.1103357216556773e-06, "loss": 1.5055, "step": 91680 }, { "epoch": 2.6674811647322336, "grad_norm": 9.25, "learning_rate": 1.1083964399830624e-06, "loss": 1.4847, "step": 91700 }, { "epoch": 2.668062949064782, "grad_norm": 15.0, "learning_rate": 1.1064571583104475e-06, "loss": 1.438, "step": 91720 }, { "epoch": 2.6686447333973295, "grad_norm": 11.5625, "learning_rate": 1.1045178766378326e-06, "loss": 1.5066, "step": 91740 }, { "epoch": 2.6692265177298777, "grad_norm": 14.1875, "learning_rate": 1.1025785949652177e-06, "loss": 1.4833, "step": 91760 }, { "epoch": 2.6698083020624255, "grad_norm": 10.625, "learning_rate": 1.1006393132926028e-06, "loss": 1.509, "step": 91780 }, { "epoch": 2.670390086394973, "grad_norm": 16.625, "learning_rate": 1.0987000316199878e-06, "loss": 1.584, "step": 91800 }, { "epoch": 2.6709718707275214, "grad_norm": 15.1875, "learning_rate": 1.096760749947373e-06, "loss": 1.4688, "step": 91820 }, { "epoch": 2.671553655060069, "grad_norm": 15.625, "learning_rate": 1.0948214682747578e-06, "loss": 1.4821, "step": 91840 }, { "epoch": 2.6721354393926173, "grad_norm": 12.0625, "learning_rate": 1.092882186602143e-06, "loss": 1.4099, "step": 91860 }, { "epoch": 2.672717223725165, "grad_norm": 16.375, "learning_rate": 1.090942904929528e-06, "loss": 1.4567, "step": 91880 }, { "epoch": 2.673299008057713, "grad_norm": 14.125, "learning_rate": 1.0890036232569131e-06, "loss": 1.5113, "step": 91900 }, { "epoch": 2.673880792390261, "grad_norm": 13.6875, "learning_rate": 1.0870643415842982e-06, "loss": 1.4174, "step": 91920 }, { "epoch": 2.6744625767228087, "grad_norm": 15.875, "learning_rate": 1.0851250599116831e-06, "loss": 1.5172, "step": 91940 }, { "epoch": 2.675044361055357, "grad_norm": 13.1875, "learning_rate": 1.0831857782390682e-06, "loss": 1.5045, "step": 91960 }, { "epoch": 2.6756261453879047, "grad_norm": 14.8125, "learning_rate": 1.0812464965664533e-06, "loss": 1.4731, "step": 91980 }, { "epoch": 2.6762079297204524, "grad_norm": 13.9375, "learning_rate": 1.0793072148938384e-06, "loss": 1.4835, "step": 92000 }, { "epoch": 2.6767897140530006, "grad_norm": 12.3125, "learning_rate": 1.0773679332212235e-06, "loss": 1.4878, "step": 92020 }, { "epoch": 2.6773714983855483, "grad_norm": 14.375, "learning_rate": 1.0754286515486086e-06, "loss": 1.4873, "step": 92040 }, { "epoch": 2.6779532827180965, "grad_norm": 12.625, "learning_rate": 1.0734893698759937e-06, "loss": 1.5304, "step": 92060 }, { "epoch": 2.6785350670506443, "grad_norm": 11.8125, "learning_rate": 1.0715500882033788e-06, "loss": 1.4758, "step": 92080 }, { "epoch": 2.679116851383192, "grad_norm": 14.5625, "learning_rate": 1.0696108065307639e-06, "loss": 1.4777, "step": 92100 }, { "epoch": 2.67969863571574, "grad_norm": 11.75, "learning_rate": 1.0676715248581488e-06, "loss": 1.4517, "step": 92120 }, { "epoch": 2.6802804200482884, "grad_norm": 13.6875, "learning_rate": 1.0657322431855339e-06, "loss": 1.5198, "step": 92140 }, { "epoch": 2.680862204380836, "grad_norm": 14.0, "learning_rate": 1.063792961512919e-06, "loss": 1.4252, "step": 92160 }, { "epoch": 2.681443988713384, "grad_norm": 12.75, "learning_rate": 1.061853679840304e-06, "loss": 1.5404, "step": 92180 }, { "epoch": 2.682025773045932, "grad_norm": 12.3125, "learning_rate": 1.0599143981676892e-06, "loss": 1.4618, "step": 92200 }, { "epoch": 2.68260755737848, "grad_norm": 13.875, "learning_rate": 1.0579751164950743e-06, "loss": 1.6344, "step": 92220 }, { "epoch": 2.683189341711028, "grad_norm": 14.125, "learning_rate": 1.0560358348224594e-06, "loss": 1.3988, "step": 92240 }, { "epoch": 2.6837711260435757, "grad_norm": 12.375, "learning_rate": 1.0540965531498445e-06, "loss": 1.4478, "step": 92260 }, { "epoch": 2.6843529103761234, "grad_norm": 11.875, "learning_rate": 1.0521572714772296e-06, "loss": 1.4711, "step": 92280 }, { "epoch": 2.6849346947086716, "grad_norm": 16.75, "learning_rate": 1.0502179898046146e-06, "loss": 1.48, "step": 92300 }, { "epoch": 2.6855164790412194, "grad_norm": 11.25, "learning_rate": 1.0482787081319995e-06, "loss": 1.4759, "step": 92320 }, { "epoch": 2.6860982633737676, "grad_norm": 13.75, "learning_rate": 1.0463394264593846e-06, "loss": 1.4739, "step": 92340 }, { "epoch": 2.6866800477063153, "grad_norm": 13.375, "learning_rate": 1.0444001447867697e-06, "loss": 1.4706, "step": 92360 }, { "epoch": 2.687261832038863, "grad_norm": 12.5, "learning_rate": 1.0424608631141548e-06, "loss": 1.5112, "step": 92380 }, { "epoch": 2.6878436163714112, "grad_norm": 15.0625, "learning_rate": 1.04052158144154e-06, "loss": 1.5621, "step": 92400 }, { "epoch": 2.688425400703959, "grad_norm": 12.4375, "learning_rate": 1.038582299768925e-06, "loss": 1.5636, "step": 92420 }, { "epoch": 2.689007185036507, "grad_norm": 13.0625, "learning_rate": 1.0366430180963101e-06, "loss": 1.4564, "step": 92440 }, { "epoch": 2.689588969369055, "grad_norm": 12.125, "learning_rate": 1.0347037364236952e-06, "loss": 1.4961, "step": 92460 }, { "epoch": 2.6901707537016026, "grad_norm": 14.125, "learning_rate": 1.03276445475108e-06, "loss": 1.4921, "step": 92480 }, { "epoch": 2.690752538034151, "grad_norm": 12.6875, "learning_rate": 1.0308251730784652e-06, "loss": 1.5171, "step": 92500 }, { "epoch": 2.6913343223666986, "grad_norm": 12.25, "learning_rate": 1.0288858914058503e-06, "loss": 1.4865, "step": 92520 }, { "epoch": 2.6919161066992467, "grad_norm": 10.375, "learning_rate": 1.0269466097332354e-06, "loss": 1.4231, "step": 92540 }, { "epoch": 2.6924978910317945, "grad_norm": 17.0, "learning_rate": 1.0250073280606205e-06, "loss": 1.468, "step": 92560 }, { "epoch": 2.6930796753643422, "grad_norm": 10.8125, "learning_rate": 1.0230680463880056e-06, "loss": 1.5362, "step": 92580 }, { "epoch": 2.6936614596968904, "grad_norm": 11.625, "learning_rate": 1.0211287647153905e-06, "loss": 1.4689, "step": 92600 }, { "epoch": 2.694243244029438, "grad_norm": 14.0, "learning_rate": 1.0191894830427756e-06, "loss": 1.4739, "step": 92620 }, { "epoch": 2.6948250283619863, "grad_norm": 12.125, "learning_rate": 1.0172502013701607e-06, "loss": 1.4328, "step": 92640 }, { "epoch": 2.695406812694534, "grad_norm": 13.6875, "learning_rate": 1.0153109196975458e-06, "loss": 1.5491, "step": 92660 }, { "epoch": 2.695988597027082, "grad_norm": 12.125, "learning_rate": 1.0133716380249309e-06, "loss": 1.5193, "step": 92680 }, { "epoch": 2.69657038135963, "grad_norm": 12.6875, "learning_rate": 1.011432356352316e-06, "loss": 1.539, "step": 92700 }, { "epoch": 2.6971521656921777, "grad_norm": 13.125, "learning_rate": 1.009493074679701e-06, "loss": 1.4706, "step": 92720 }, { "epoch": 2.697733950024726, "grad_norm": 15.0, "learning_rate": 1.0075537930070862e-06, "loss": 1.6127, "step": 92740 }, { "epoch": 2.6983157343572737, "grad_norm": 12.1875, "learning_rate": 1.0056145113344713e-06, "loss": 1.6467, "step": 92760 }, { "epoch": 2.6988975186898214, "grad_norm": 13.9375, "learning_rate": 1.0036752296618564e-06, "loss": 1.5672, "step": 92780 }, { "epoch": 2.6994793030223696, "grad_norm": 12.0625, "learning_rate": 1.0017359479892412e-06, "loss": 1.4779, "step": 92800 }, { "epoch": 2.700061087354918, "grad_norm": 12.375, "learning_rate": 9.997966663166263e-07, "loss": 1.4741, "step": 92820 } ], "logging_steps": 20, "max_steps": 103131, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10314, "total_flos": 6.503393522895938e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }