{
  "best_metric": 4.111624240875244,
  "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/full/lstm/1/checkpoints/checkpoint-610552",
  "epoch": 0.025000278439663435,
  "eval_steps": 10,
  "global_step": 610552,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 4.999998362119627e-05,
      "loss": 10.8213,
      "step": 1
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.999161405248948e-05,
      "loss": 7.5645,
      "step": 512
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.998322810497896e-05,
      "loss": 7.0515,
      "step": 1024
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.997484215746844e-05,
      "loss": 6.9831,
      "step": 1536
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.996645620995792e-05,
      "loss": 6.9548,
      "step": 2048
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.99580702624474e-05,
      "loss": 6.8802,
      "step": 2560
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.994968431493688e-05,
      "loss": 6.736,
      "step": 3072
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.994129836742636e-05,
      "loss": 6.6253,
      "step": 3584
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.993291241991584e-05,
      "loss": 6.5411,
      "step": 4096
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.992452647240532e-05,
      "loss": 6.4495,
      "step": 4608
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.99161405248948e-05,
      "loss": 6.4027,
      "step": 5120
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.990775457738428e-05,
      "loss": 6.3385,
      "step": 5632
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.989936862987376e-05,
      "loss": 6.2752,
      "step": 6144
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.989098268236324e-05,
      "loss": 6.2086,
      "step": 6656
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.988259673485272e-05,
      "loss": 6.1566,
      "step": 7168
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.98742107873422e-05,
      "loss": 6.1122,
      "step": 7680
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.986582483983168e-05,
      "loss": 6.0644,
      "step": 8192
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.985743889232116e-05,
      "loss": 6.0115,
      "step": 8704
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.984905294481064e-05,
      "loss": 5.9764,
      "step": 9216
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.984066699730012e-05,
      "loss": 5.9421,
      "step": 9728
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9832281049789595e-05,
      "loss": 5.8975,
      "step": 10240
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.982391148108281e-05,
      "loss": 5.8621,
      "step": 10752
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.981552553357229e-05,
      "loss": 5.8402,
      "step": 11264
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.980713958606178e-05,
      "loss": 5.8026,
      "step": 11776
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.979875363855125e-05,
      "loss": 5.7808,
      "step": 12288
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9790400448648195e-05,
      "loss": 5.754,
      "step": 12800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9782014501137675e-05,
      "loss": 5.7313,
      "step": 13312
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9773628553627155e-05,
      "loss": 5.7017,
      "step": 13824
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9765242606116635e-05,
      "loss": 5.6683,
      "step": 14336
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9756856658606115e-05,
      "loss": 5.6615,
      "step": 14848
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9748470711095595e-05,
      "loss": 5.6285,
      "step": 15360
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.974008476358507e-05,
      "loss": 5.6122,
      "step": 15872
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9731715194878284e-05,
      "loss": 5.586,
      "step": 16384
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9723329247367764e-05,
      "loss": 5.5823,
      "step": 16896
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9714943299857244e-05,
      "loss": 5.5499,
      "step": 17408
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9706557352346724e-05,
      "loss": 5.5409,
      "step": 17920
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.969818778363994e-05,
      "loss": 5.5269,
      "step": 18432
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.968980183612942e-05,
      "loss": 5.5069,
      "step": 18944
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96814158886189e-05,
      "loss": 5.4964,
      "step": 19456
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.967302994110837e-05,
      "loss": 5.4737,
      "step": 19968
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.966464399359785e-05,
      "loss": 5.4496,
      "step": 20480
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.965625804608733e-05,
      "loss": 5.4441,
      "step": 20992
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.964787209857681e-05,
      "loss": 5.4158,
      "step": 21504
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.963948615106629e-05,
      "loss": 5.4276,
      "step": 22016
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96311165823595e-05,
      "loss": 5.4002,
      "step": 22528
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.962273063484898e-05,
      "loss": 5.3954,
      "step": 23040
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96143610661422e-05,
      "loss": 5.3797,
      "step": 23552
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.960597511863168e-05,
      "loss": 5.3764,
      "step": 24064
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.959758917112116e-05,
      "loss": 5.3695,
      "step": 24576
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.958920322361064e-05,
      "loss": 5.3413,
      "step": 25088
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.958081727610012e-05,
      "loss": 5.3289,
      "step": 25600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.957244770739333e-05,
      "loss": 5.3212,
      "step": 26112
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.956406175988281e-05,
      "loss": 5.3278,
      "step": 26624
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9555692191176016e-05,
      "loss": 5.3042,
      "step": 27136
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9547306243665496e-05,
      "loss": 5.3119,
      "step": 27648
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9538920296154976e-05,
      "loss": 5.2815,
      "step": 28160
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9530534348644456e-05,
      "loss": 5.2791,
      "step": 28672
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9522148401133936e-05,
      "loss": 5.2658,
      "step": 29184
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.951376245362342e-05,
      "loss": 5.2593,
      "step": 29696
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.95053765061129e-05,
      "loss": 5.2565,
      "step": 30208
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.949699055860238e-05,
      "loss": 5.2359,
      "step": 30720
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.948860461109186e-05,
      "loss": 5.2316,
      "step": 31232
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.948023504238507e-05,
      "loss": 5.2351,
      "step": 31744
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.947184909487455e-05,
      "loss": 5.2133,
      "step": 32256
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.946346314736403e-05,
      "loss": 5.2125,
      "step": 32768
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.945509357865724e-05,
      "loss": 5.2008,
      "step": 33280
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.944670763114672e-05,
      "loss": 5.2003,
      "step": 33792
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.94383216836362e-05,
      "loss": 5.1851,
      "step": 34304
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.942993573612568e-05,
      "loss": 5.1879,
      "step": 34816
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.942154978861516e-05,
      "loss": 5.1751,
      "step": 35328
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.941316384110464e-05,
      "loss": 5.1642,
      "step": 35840
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.940477789359412e-05,
      "loss": 5.1667,
      "step": 36352
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.93963919460836e-05,
      "loss": 5.1528,
      "step": 36864
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9388022377376816e-05,
      "loss": 5.1613,
      "step": 37376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9379636429866296e-05,
      "loss": 5.1582,
      "step": 37888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9371266861159505e-05,
      "loss": 5.1489,
      "step": 38400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9362880913648985e-05,
      "loss": 5.1294,
      "step": 38912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9354494966138465e-05,
      "loss": 5.1139,
      "step": 39424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9346109018627945e-05,
      "loss": 5.1069,
      "step": 39936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9337723071117425e-05,
      "loss": 5.112,
      "step": 40448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9329337123606905e-05,
      "loss": 5.1168,
      "step": 40960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.932095117609638e-05,
      "loss": 5.1102,
      "step": 41472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.931256522858586e-05,
      "loss": 5.0849,
      "step": 41984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.930417928107534e-05,
      "loss": 5.0786,
      "step": 42496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9295793333564825e-05,
      "loss": 5.0875,
      "step": 43008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9287407386054305e-05,
      "loss": 5.0727,
      "step": 43520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9279021438543785e-05,
      "loss": 5.0728,
      "step": 44032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9270651869836994e-05,
      "loss": 5.0653,
      "step": 44544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9262265922326474e-05,
      "loss": 5.0665,
      "step": 45056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9253879974815954e-05,
      "loss": 5.0551,
      "step": 45568
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9245494027305433e-05,
      "loss": 5.047,
      "step": 46080
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.923712445859864e-05,
      "loss": 5.0335,
      "step": 46592
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.922873851108812e-05,
      "loss": 5.047,
      "step": 47104
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.92203525635776e-05,
      "loss": 5.0329,
      "step": 47616
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.921196661606708e-05,
      "loss": 5.0162,
      "step": 48128
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.920358066855656e-05,
      "loss": 5.0157,
      "step": 48640
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.919519472104604e-05,
      "loss": 5.0278,
      "step": 49152
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.918682515233926e-05,
      "loss": 5.0184,
      "step": 49664
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.917843920482874e-05,
      "loss": 5.0099,
      "step": 50176
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.917005325731822e-05,
      "loss": 4.9974,
      "step": 50688
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.91616673098077e-05,
      "loss": 5.0021,
      "step": 51200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.915329774110091e-05,
      "loss": 4.9813,
      "step": 51712
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.914491179359039e-05,
      "loss": 4.9968,
      "step": 52224
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.913652584607987e-05,
      "loss": 4.9991,
      "step": 52736
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.912813989856935e-05,
      "loss": 4.9737,
      "step": 53248
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.911975395105883e-05,
      "loss": 4.9676,
      "step": 53760
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.911136800354831e-05,
      "loss": 4.9428,
      "step": 54272
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.910298205603779e-05,
      "loss": 4.9555,
      "step": 54784
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.909459610852727e-05,
      "loss": 4.9556,
      "step": 55296
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9086226539820476e-05,
      "loss": 4.9508,
      "step": 55808
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.907785697111369e-05,
      "loss": 4.9509,
      "step": 56320
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.906947102360317e-05,
      "loss": 4.9356,
      "step": 56832
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.906110145489638e-05,
      "loss": 4.9361,
      "step": 57344
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.905271550738586e-05,
      "loss": 4.937,
      "step": 57856
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.904432955987534e-05,
      "loss": 4.9393,
      "step": 58368
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.903594361236482e-05,
      "loss": 4.9269,
      "step": 58880
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.90275576648543e-05,
      "loss": 4.9157,
      "step": 59392
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.901917171734378e-05,
      "loss": 4.9165,
      "step": 59904
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.901078576983326e-05,
      "loss": 4.92,
      "step": 60416
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.900239982232274e-05,
      "loss": 4.9154,
      "step": 60928
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8994013874812214e-05,
      "loss": 4.91,
      "step": 61440
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.898564430610543e-05,
      "loss": 4.8992,
      "step": 61952
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8977274737398646e-05,
      "loss": 4.8913,
      "step": 62464
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8968888789888125e-05,
      "loss": 4.8854,
      "step": 62976
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8960502842377605e-05,
      "loss": 4.8927,
      "step": 63488
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8952116894867085e-05,
      "loss": 4.8904,
      "step": 64000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8943730947356565e-05,
      "loss": 4.8911,
      "step": 64512
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.893534499984604e-05,
      "loss": 4.8653,
      "step": 65024
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.892695905233552e-05,
      "loss": 4.8793,
      "step": 65536
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8918573104825e-05,
      "loss": 4.8787,
      "step": 66048
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.891018715731448e-05,
      "loss": 4.865,
      "step": 66560
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.890181758860769e-05,
      "loss": 4.8807,
      "step": 67072
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.889343164109717e-05,
      "loss": 4.8592,
      "step": 67584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.888504569358665e-05,
      "loss": 4.8725,
      "step": 68096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8876659746076134e-05,
      "loss": 4.8632,
      "step": 68608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8868273798565614e-05,
      "loss": 4.8408,
      "step": 69120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.885990422985882e-05,
      "loss": 4.8402,
      "step": 69632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.88515182823483e-05,
      "loss": 4.8403,
      "step": 70144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.884313233483778e-05,
      "loss": 4.844,
      "step": 70656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.883474638732726e-05,
      "loss": 4.8483,
      "step": 71168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.882637681862047e-05,
      "loss": 4.8431,
      "step": 71680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.881799087110995e-05,
      "loss": 4.8381,
      "step": 72192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.880960492359943e-05,
      "loss": 4.8284,
      "step": 72704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.880121897608891e-05,
      "loss": 4.8163,
      "step": 73216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.879283302857839e-05,
      "loss": 4.8157,
      "step": 73728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.878444708106787e-05,
      "loss": 4.8223,
      "step": 74240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.877606113355735e-05,
      "loss": 4.8298,
      "step": 74752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.876767518604683e-05,
      "loss": 4.8215,
      "step": 75264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.875930561734005e-05,
      "loss": 4.804,
      "step": 75776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.875091966982953e-05,
      "loss": 4.8036,
      "step": 76288
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.769171237945557,
      "eval_runtime": 589.0702,
      "eval_samples_per_second": 647.785,
      "eval_steps_per_second": 20.244,
      "step": 76319
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.874253372231901e-05,
      "loss": 4.8109,
      "step": 76800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.873414777480849e-05,
      "loss": 4.8026,
      "step": 77312
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.872576182729797e-05,
      "loss": 4.7949,
      "step": 77824
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.871737587978745e-05,
      "loss": 4.7949,
      "step": 78336
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.870898993227693e-05,
      "loss": 4.7846,
      "step": 78848
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.87006039847664e-05,
      "loss": 4.7696,
      "step": 79360
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.869221803725588e-05,
      "loss": 4.7767,
      "step": 79872
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.868383208974536e-05,
      "loss": 4.7869,
      "step": 80384
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.867544614223484e-05,
      "loss": 4.7699,
      "step": 80896
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8667076573528056e-05,
      "loss": 4.7771,
      "step": 81408
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8658690626017536e-05,
      "loss": 4.7817,
      "step": 81920
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8650304678507016e-05,
      "loss": 4.7796,
      "step": 82432
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8641918730996496e-05,
      "loss": 4.7685,
      "step": 82944
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8633532783485976e-05,
      "loss": 4.7648,
      "step": 83456
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8625146835975456e-05,
      "loss": 4.7622,
      "step": 83968
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8616760888464936e-05,
      "loss": 4.7531,
      "step": 84480
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8608374940954416e-05,
      "loss": 4.7559,
      "step": 84992
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8599988993443896e-05,
      "loss": 4.7563,
      "step": 85504
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8591603045933376e-05,
      "loss": 4.7553,
      "step": 86016
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8583217098422856e-05,
      "loss": 4.7532,
      "step": 86528
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8574831150912336e-05,
      "loss": 4.7413,
      "step": 87040
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8566445203401816e-05,
      "loss": 4.7521,
      "step": 87552
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.855805925589129e-05,
      "loss": 4.7377,
      "step": 88064
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.854967330838077e-05,
      "loss": 4.7476,
      "step": 88576
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.8541287360870256e-05,
      "loss": 4.737,
      "step": 89088
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.85329341709672e-05,
      "loss": 4.7359,
      "step": 89600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.852454822345668e-05,
      "loss": 4.7376,
      "step": 90112
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.851616227594616e-05,
      "loss": 4.7198,
      "step": 90624
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.850777632843564e-05,
      "loss": 4.7336,
      "step": 91136
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8499390380925114e-05,
      "loss": 4.714,
      "step": 91648
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8491004433414594e-05,
      "loss": 4.7198,
      "step": 92160
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8482618485904074e-05,
      "loss": 4.7157,
      "step": 92672
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.847424891719729e-05,
      "loss": 4.7243,
      "step": 93184
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.84658793484905e-05,
      "loss": 4.7119,
      "step": 93696
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.845749340097998e-05,
      "loss": 4.7053,
      "step": 94208
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.844910745346946e-05,
      "loss": 4.7219,
      "step": 94720
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.844072150595894e-05,
      "loss": 4.7076,
      "step": 95232
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.843233555844842e-05,
      "loss": 4.7151,
      "step": 95744
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.84239496109379e-05,
      "loss": 4.7019,
      "step": 96256
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.841556366342738e-05,
      "loss": 4.6875,
      "step": 96768
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.840717771591686e-05,
      "loss": 4.6919,
      "step": 97280
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.839880814721007e-05,
      "loss": 4.6831,
      "step": 97792
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.839042219969955e-05,
      "loss": 4.6987,
      "step": 98304
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.838203625218903e-05,
      "loss": 4.6872,
      "step": 98816
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.837365030467851e-05,
      "loss": 4.6933,
      "step": 99328
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.836526435716799e-05,
      "loss": 4.6852,
      "step": 99840
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8356894788461196e-05,
      "loss": 4.6899,
      "step": 100352
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8348508840950676e-05,
      "loss": 4.6865,
      "step": 100864
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8340122893440156e-05,
      "loss": 4.6644,
      "step": 101376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.833173694592964e-05,
      "loss": 4.668,
      "step": 101888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.832336737722285e-05,
      "loss": 4.6661,
      "step": 102400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.831498142971233e-05,
      "loss": 4.6784,
      "step": 102912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.830659548220181e-05,
      "loss": 4.6562,
      "step": 103424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.829820953469129e-05,
      "loss": 4.6805,
      "step": 103936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.828982358718077e-05,
      "loss": 4.6559,
      "step": 104448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.828143763967025e-05,
      "loss": 4.6552,
      "step": 104960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.827305169215973e-05,
      "loss": 4.6579,
      "step": 105472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.826466574464921e-05,
      "loss": 4.6531,
      "step": 105984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.825627979713869e-05,
      "loss": 4.6541,
      "step": 106496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.824792660723563e-05,
      "loss": 4.6459,
      "step": 107008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.823954065972511e-05,
      "loss": 4.6447,
      "step": 107520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8231171091018326e-05,
      "loss": 4.6548,
      "step": 108032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8222785143507806e-05,
      "loss": 4.6412,
      "step": 108544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8214399195997286e-05,
      "loss": 4.6403,
      "step": 109056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8206013248486766e-05,
      "loss": 4.6374,
      "step": 109568
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8197627300976246e-05,
      "loss": 4.643,
      "step": 110080
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8189241353465726e-05,
      "loss": 4.6266,
      "step": 110592
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8180855405955206e-05,
      "loss": 4.6451,
      "step": 111104
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8172485837248415e-05,
      "loss": 4.6328,
      "step": 111616
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8164099889737895e-05,
      "loss": 4.6304,
      "step": 112128
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8155713942227375e-05,
      "loss": 4.6366,
      "step": 112640
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8147327994716854e-05,
      "loss": 4.6245,
      "step": 113152
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8138942047206334e-05,
      "loss": 4.6417,
      "step": 113664
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8130556099695814e-05,
      "loss": 4.6454,
      "step": 114176
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8122170152185294e-05,
      "loss": 4.6396,
      "step": 114688
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8113784204674774e-05,
      "loss": 4.6219,
      "step": 115200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8105398257164254e-05,
      "loss": 4.6139,
      "step": 115712
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.8097012309653734e-05,
      "loss": 4.6101,
      "step": 116224
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.808864274094695e-05,
      "loss": 4.6203,
      "step": 116736
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.808027317224016e-05,
      "loss": 4.6318,
      "step": 117248
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.807188722472964e-05,
      "loss": 4.6234,
      "step": 117760
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.806350127721912e-05,
      "loss": 4.6095,
      "step": 118272
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.80551153297086e-05,
      "loss": 4.5985,
      "step": 118784
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.804672938219807e-05,
      "loss": 4.6168,
      "step": 119296
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.803834343468755e-05,
      "loss": 4.6017,
      "step": 119808
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.802995748717703e-05,
      "loss": 4.6049,
      "step": 120320
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.802157153966652e-05,
      "loss": 4.6079,
      "step": 120832
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.801320197095973e-05,
      "loss": 4.6147,
      "step": 121344
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.800481602344921e-05,
      "loss": 4.6023,
      "step": 121856
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.799643007593869e-05,
      "loss": 4.5988,
      "step": 122368
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.798804412842817e-05,
      "loss": 4.5903,
      "step": 122880
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.797965818091765e-05,
      "loss": 4.6031,
      "step": 123392
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.797128861221086e-05,
      "loss": 4.601,
      "step": 123904
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.796290266470034e-05,
      "loss": 4.5775,
      "step": 124416
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.795451671718982e-05,
      "loss": 4.5858,
      "step": 124928
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.79461307696793e-05,
      "loss": 4.6014,
      "step": 125440
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.793774482216878e-05,
      "loss": 4.6017,
      "step": 125952
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.792935887465826e-05,
      "loss": 4.588,
      "step": 126464
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.792098930595147e-05,
      "loss": 4.5824,
      "step": 126976
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.791260335844095e-05,
      "loss": 4.5923,
      "step": 127488
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.790421741093043e-05,
      "loss": 4.5731,
      "step": 128000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.789583146341991e-05,
      "loss": 4.5881,
      "step": 128512
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.788744551590939e-05,
      "loss": 4.5934,
      "step": 129024
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.78790759472026e-05,
      "loss": 4.5784,
      "step": 129536
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.787068999969208e-05,
      "loss": 4.5772,
      "step": 130048
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.786230405218156e-05,
      "loss": 4.5558,
      "step": 130560
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.785393448347477e-05,
      "loss": 4.5621,
      "step": 131072
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.784554853596425e-05,
      "loss": 4.5681,
      "step": 131584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.783716258845373e-05,
      "loss": 4.5696,
      "step": 132096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.782877664094321e-05,
      "loss": 4.5729,
      "step": 132608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.782039069343269e-05,
      "loss": 4.5607,
      "step": 133120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.781200474592217e-05,
      "loss": 4.5587,
      "step": 133632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.780361879841165e-05,
      "loss": 4.5656,
      "step": 134144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.779523285090114e-05,
      "loss": 4.5725,
      "step": 134656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.778684690339061e-05,
      "loss": 4.5609,
      "step": 135168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7778477334683826e-05,
      "loss": 4.5613,
      "step": 135680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7770091387173306e-05,
      "loss": 4.5575,
      "step": 136192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7761705439662786e-05,
      "loss": 4.561,
      "step": 136704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7753335870955995e-05,
      "loss": 4.5606,
      "step": 137216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7744949923445475e-05,
      "loss": 4.5546,
      "step": 137728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7736563975934955e-05,
      "loss": 4.5515,
      "step": 138240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7728178028424435e-05,
      "loss": 4.5472,
      "step": 138752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.771979208091391e-05,
      "loss": 4.5424,
      "step": 139264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.771140613340339e-05,
      "loss": 4.5481,
      "step": 139776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7703020185892875e-05,
      "loss": 4.5508,
      "step": 140288
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7694634238382355e-05,
      "loss": 4.5538,
      "step": 140800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7686248290871835e-05,
      "loss": 4.5356,
      "step": 141312
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7677862343361315e-05,
      "loss": 4.5436,
      "step": 141824
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7669476395850795e-05,
      "loss": 4.5556,
      "step": 142336
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.7661106827144004e-05,
      "loss": 4.5395,
      "step": 142848
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.765273725843721e-05,
      "loss": 4.5519,
      "step": 143360
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.764435131092669e-05,
      "loss": 4.5363,
      "step": 143872
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.763596536341617e-05,
      "loss": 4.5547,
      "step": 144384
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.762759579470938e-05,
      "loss": 4.5418,
      "step": 144896
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.761920984719886e-05,
      "loss": 4.5279,
      "step": 145408
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.761082389968834e-05,
      "loss": 4.5299,
      "step": 145920
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.760243795217783e-05,
      "loss": 4.525,
      "step": 146432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.759405200466731e-05,
      "loss": 4.5383,
      "step": 146944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.758566605715679e-05,
      "loss": 4.5384,
      "step": 147456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.757728010964627e-05,
      "loss": 4.5395,
      "step": 147968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.756889416213575e-05,
      "loss": 4.5332,
      "step": 148480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.756050821462523e-05,
      "loss": 4.5258,
      "step": 148992
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.755212226711471e-05,
      "loss": 4.5207,
      "step": 149504
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.754373631960419e-05,
      "loss": 4.5162,
      "step": 150016
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.75353667508974e-05,
      "loss": 4.5269,
      "step": 150528
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.752698080338688e-05,
      "loss": 4.5398,
      "step": 151040
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.751859485587636e-05,
      "loss": 4.5253,
      "step": 151552
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.751020890836584e-05,
      "loss": 4.516,
      "step": 152064
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.750182296085532e-05,
      "loss": 4.5156,
      "step": 152576
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.485103130340576,
      "eval_runtime": 527.7119,
      "eval_samples_per_second": 723.105,
      "eval_steps_per_second": 22.598,
      "step": 152638
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.74934370133448e-05,
      "loss": 4.5263,
      "step": 153088
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.748505106583428e-05,
      "loss": 4.5219,
      "step": 153600
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.747668149712749e-05,
      "loss": 4.5095,
      "step": 154112
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.74683119284207e-05,
      "loss": 4.5124,
      "step": 154624
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.745992598091018e-05,
      "loss": 4.5079,
      "step": 155136
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.745154003339966e-05,
      "loss": 4.4944,
      "step": 155648
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.744315408588914e-05,
      "loss": 4.5033,
      "step": 156160
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.743478451718235e-05,
      "loss": 4.5144,
      "step": 156672
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.742639856967183e-05,
      "loss": 4.4941,
      "step": 157184
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.741802900096504e-05,
      "loss": 4.5058,
      "step": 157696
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.740964305345452e-05,
      "loss": 4.5157,
      "step": 158208
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.7401257105944e-05,
      "loss": 4.5131,
      "step": 158720
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.739287115843348e-05,
      "loss": 4.5003,
      "step": 159232
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.7384485210922966e-05,
      "loss": 4.5014,
      "step": 159744
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.7376099263412446e-05,
      "loss": 4.4991,
      "step": 160256
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.736771331590192e-05,
      "loss": 4.4937,
      "step": 160768
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.73593273683914e-05,
      "loss": 4.4923,
      "step": 161280
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.735094142088088e-05,
      "loss": 4.4985,
      "step": 161792
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.734255547337036e-05,
      "loss": 4.4993,
      "step": 162304
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.733416952585984e-05,
      "loss": 4.4986,
      "step": 162816
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.732578357834932e-05,
      "loss": 4.4894,
      "step": 163328
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.73173976308388e-05,
      "loss": 4.5022,
      "step": 163840
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.730902806213201e-05,
      "loss": 4.4888,
      "step": 164352
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.730064211462149e-05,
      "loss": 4.4955,
      "step": 164864
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.729225616711097e-05,
      "loss": 4.4897,
      "step": 165376
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.728387021960045e-05,
      "loss": 4.4892,
      "step": 165888
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.727548427208993e-05,
      "loss": 4.494,
      "step": 166400
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.7267098324579415e-05,
      "loss": 4.4764,
      "step": 166912
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.7258728755872624e-05,
      "loss": 4.4913,
      "step": 167424
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7250342808362104e-05,
      "loss": 4.4761,
      "step": 167936
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7241956860851584e-05,
      "loss": 4.4792,
      "step": 168448
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7233570913341064e-05,
      "loss": 4.4798,
      "step": 168960
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7225184965830544e-05,
      "loss": 4.4851,
      "step": 169472
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.721681539712375e-05,
      "loss": 4.4783,
      "step": 169984
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.720842944961323e-05,
      "loss": 4.4698,
      "step": 170496
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.720004350210271e-05,
      "loss": 4.4887,
      "step": 171008
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.719165755459219e-05,
      "loss": 4.4752,
      "step": 171520
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.718327160708167e-05,
      "loss": 4.4876,
      "step": 172032
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.717488565957115e-05,
      "loss": 4.4736,
      "step": 172544
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.716649971206063e-05,
      "loss": 4.4606,
      "step": 173056
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.715811376455011e-05,
      "loss": 4.4608,
      "step": 173568
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.714974419584333e-05,
      "loss": 4.4621,
      "step": 174080
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.714137462713654e-05,
      "loss": 4.4806,
      "step": 174592
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.713298867962602e-05,
      "loss": 4.4587,
      "step": 175104
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.71246027321155e-05,
      "loss": 4.4722,
      "step": 175616
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.711621678460498e-05,
      "loss": 4.4669,
      "step": 176128
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.710783083709446e-05,
      "loss": 4.4653,
      "step": 176640
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.709944488958393e-05,
      "loss": 4.4728,
      "step": 177152
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.709105894207341e-05,
      "loss": 4.4475,
      "step": 177664
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.708267299456289e-05,
      "loss": 4.4513,
      "step": 178176
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7074303425856106e-05,
      "loss": 4.4476,
      "step": 178688
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7065917478345586e-05,
      "loss": 4.4672,
      "step": 179200
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7057531530835066e-05,
      "loss": 4.4459,
      "step": 179712
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7049145583324546e-05,
      "loss": 4.4699,
      "step": 180224
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7040776014617755e-05,
      "loss": 4.4456,
      "step": 180736
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7032390067107235e-05,
      "loss": 4.4458,
      "step": 181248
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7024004119596715e-05,
      "loss": 4.4455,
      "step": 181760
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7015618172086195e-05,
      "loss": 4.4435,
      "step": 182272
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7007232224575675e-05,
      "loss": 4.4497,
      "step": 182784
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6998846277065155e-05,
      "loss": 4.4447,
      "step": 183296
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6990460329554635e-05,
      "loss": 4.4335,
      "step": 183808
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6982090760847844e-05,
      "loss": 4.4508,
      "step": 184320
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6973704813337324e-05,
      "loss": 4.438,
      "step": 184832
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.696533524463054e-05,
      "loss": 4.4411,
      "step": 185344
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.695694929712002e-05,
      "loss": 4.438,
      "step": 185856
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.69485633496095e-05,
      "loss": 4.4436,
      "step": 186368
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.694017740209898e-05,
      "loss": 4.4244,
      "step": 186880
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.693179145458846e-05,
      "loss": 4.4494,
      "step": 187392
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.692340550707794e-05,
      "loss": 4.4339,
      "step": 187904
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.691501955956742e-05,
      "loss": 4.4373,
      "step": 188416
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.69066336120569e-05,
      "loss": 4.4395,
      "step": 188928
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.689826404335011e-05,
      "loss": 4.4292,
      "step": 189440
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.688987809583959e-05,
      "loss": 4.4479,
      "step": 189952
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.68815085271328e-05,
      "loss": 4.4521,
      "step": 190464
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.687312257962228e-05,
      "loss": 4.4497,
      "step": 190976
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6864753010915494e-05,
      "loss": 4.4338,
      "step": 191488
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6856367063404974e-05,
      "loss": 4.4249,
      "step": 192000
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6847981115894454e-05,
      "loss": 4.4181,
      "step": 192512
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6839595168383934e-05,
      "loss": 4.4363,
      "step": 193024
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6831209220873414e-05,
      "loss": 4.4439,
      "step": 193536
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6822823273362894e-05,
      "loss": 4.4378,
      "step": 194048
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6814437325852373e-05,
      "loss": 4.4247,
      "step": 194560
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6806051378341853e-05,
      "loss": 4.4157,
      "step": 195072
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6797665430831333e-05,
      "loss": 4.4293,
      "step": 195584
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.678927948332081e-05,
      "loss": 4.4159,
      "step": 196096
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.678090991461402e-05,
      "loss": 4.421,
      "step": 196608
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.67725239671035e-05,
      "loss": 4.429,
      "step": 197120
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.676413801959298e-05,
      "loss": 4.4286,
      "step": 197632
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.675575207208246e-05,
      "loss": 4.4235,
      "step": 198144
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.674736612457194e-05,
      "loss": 4.4243,
      "step": 198656
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.673899655586516e-05,
      "loss": 4.4129,
      "step": 199168
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.673061060835464e-05,
      "loss": 4.4243,
      "step": 199680
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.672222466084412e-05,
      "loss": 4.4212,
      "step": 200192
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.671383871333359e-05,
      "loss": 4.406,
      "step": 200704
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.670546914462681e-05,
      "loss": 4.4093,
      "step": 201216
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.669708319711629e-05,
      "loss": 4.4255,
      "step": 201728
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.668869724960576e-05,
      "loss": 4.4273,
      "step": 202240
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.668031130209524e-05,
      "loss": 4.4122,
      "step": 202752
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6671941733388456e-05,
      "loss": 4.4134,
      "step": 203264
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6663555785877936e-05,
      "loss": 4.4193,
      "step": 203776
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6655169838367416e-05,
      "loss": 4.4022,
      "step": 204288
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6646783890856896e-05,
      "loss": 4.4186,
      "step": 204800
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6638397943346376e-05,
      "loss": 4.4239,
      "step": 205312
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.663002837463959e-05,
      "loss": 4.4078,
      "step": 205824
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6621642427129065e-05,
      "loss": 4.4144,
      "step": 206336
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6613256479618545e-05,
      "loss": 4.3884,
      "step": 206848
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6604870532108025e-05,
      "loss": 4.3918,
      "step": 207360
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6596484584597505e-05,
      "loss": 4.4015,
      "step": 207872
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6588098637086985e-05,
      "loss": 4.4042,
      "step": 208384
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6579712689576465e-05,
      "loss": 4.4107,
      "step": 208896
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6571326742065945e-05,
      "loss": 4.3957,
      "step": 209408
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6562957173359154e-05,
      "loss": 4.3936,
      "step": 209920
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.655458760465237e-05,
      "loss": 4.3993,
      "step": 210432
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.654620165714185e-05,
      "loss": 4.4114,
      "step": 210944
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.653781570963133e-05,
      "loss": 4.4021,
      "step": 211456
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.652942976212081e-05,
      "loss": 4.4048,
      "step": 211968
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.652104381461029e-05,
      "loss": 4.3946,
      "step": 212480
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.651265786709977e-05,
      "loss": 4.3957,
      "step": 212992
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.650428829839298e-05,
      "loss": 4.4068,
      "step": 213504
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.649590235088246e-05,
      "loss": 4.3947,
      "step": 214016
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.648751640337194e-05,
      "loss": 4.3941,
      "step": 214528
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.647913045586142e-05,
      "loss": 4.3924,
      "step": 215040
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.64707445083509e-05,
      "loss": 4.3842,
      "step": 215552
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.646235856084038e-05,
      "loss": 4.3928,
      "step": 216064
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.645397261332986e-05,
      "loss": 4.3928,
      "step": 216576
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.644558666581934e-05,
      "loss": 4.4017,
      "step": 217088
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6437217097112554e-05,
      "loss": 4.3809,
      "step": 217600
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.6428831149602034e-05,
      "loss": 4.3875,
      "step": 218112
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.642046158089524e-05,
      "loss": 4.4029,
      "step": 218624
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.641207563338472e-05,
      "loss": 4.385,
      "step": 219136
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.64036896858742e-05,
      "loss": 4.4004,
      "step": 219648
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.639530373836368e-05,
      "loss": 4.3869,
      "step": 220160
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.638691779085316e-05,
      "loss": 4.4033,
      "step": 220672
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.637853184334264e-05,
      "loss": 4.3887,
      "step": 221184
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.637014589583212e-05,
      "loss": 4.3809,
      "step": 221696
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.636179270592906e-05,
      "loss": 4.3798,
      "step": 222208
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.635340675841854e-05,
      "loss": 4.3751,
      "step": 222720
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.634502081090802e-05,
      "loss": 4.3877,
      "step": 223232
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.633663486339751e-05,
      "loss": 4.3948,
      "step": 223744
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.632824891588699e-05,
      "loss": 4.3892,
      "step": 224256
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.631986296837647e-05,
      "loss": 4.3858,
      "step": 224768
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.631147702086595e-05,
      "loss": 4.3795,
      "step": 225280
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.630309107335542e-05,
      "loss": 4.3773,
      "step": 225792
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.62947051258449e-05,
      "loss": 4.3714,
      "step": 226304
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.628631917833438e-05,
      "loss": 4.384,
      "step": 226816
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.627793323082386e-05,
      "loss": 4.3931,
      "step": 227328
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.626956366211707e-05,
      "loss": 4.3819,
      "step": 227840
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.626117771460655e-05,
      "loss": 4.3746,
      "step": 228352
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.625279176709603e-05,
      "loss": 4.3728,
      "step": 228864
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.3495965003967285,
      "eval_runtime": 528.0293,
      "eval_samples_per_second": 722.67,
      "eval_steps_per_second": 22.584,
      "step": 228957
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.624440581958551e-05,
      "loss": 4.3835,
      "step": 229376
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.623601987207499e-05,
      "loss": 4.3841,
      "step": 229888
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6227633924564476e-05,
      "loss": 4.3702,
      "step": 230400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6219247977053956e-05,
      "loss": 4.3685,
      "step": 230912
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6210862029543436e-05,
      "loss": 4.3712,
      "step": 231424
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6202476082032916e-05,
      "loss": 4.3529,
      "step": 231936
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6194090134522396e-05,
      "loss": 4.3629,
      "step": 232448
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6185704187011876e-05,
      "loss": 4.3723,
      "step": 232960
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6177318239501356e-05,
      "loss": 4.3615,
      "step": 233472
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6168932291990836e-05,
      "loss": 4.3695,
      "step": 233984
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.616054634448031e-05,
      "loss": 4.3765,
      "step": 234496
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.615216039696979e-05,
      "loss": 4.373,
      "step": 235008
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.614377444945927e-05,
      "loss": 4.3642,
      "step": 235520
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.613538850194875e-05,
      "loss": 4.3695,
      "step": 236032
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.612700255443823e-05,
      "loss": 4.3618,
      "step": 236544
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.611861660692771e-05,
      "loss": 4.3621,
      "step": 237056
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.611023065941719e-05,
      "loss": 4.3548,
      "step": 237568
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6101861090710405e-05,
      "loss": 4.3633,
      "step": 238080
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6093475143199885e-05,
      "loss": 4.3644,
      "step": 238592
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6085089195689365e-05,
      "loss": 4.3678,
      "step": 239104
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6076703248178845e-05,
      "loss": 4.3579,
      "step": 239616
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6068317300668325e-05,
      "loss": 4.3697,
      "step": 240128
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6059931353157805e-05,
      "loss": 4.3595,
      "step": 240640
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6051545405647285e-05,
      "loss": 4.3603,
      "step": 241152
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6043159458136764e-05,
      "loss": 4.36,
      "step": 241664
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6034789889429974e-05,
      "loss": 4.3574,
      "step": 242176
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.6026403941919454e-05,
      "loss": 4.3632,
      "step": 242688
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.601803437321266e-05,
      "loss": 4.3485,
      "step": 243200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.600964842570214e-05,
      "loss": 4.3636,
      "step": 243712
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.600126247819162e-05,
      "loss": 4.3482,
      "step": 244224
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.599287653068111e-05,
      "loss": 4.3511,
      "step": 244736
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.598449058317059e-05,
      "loss": 4.3541,
      "step": 245248
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.59761210144638e-05,
      "loss": 4.3607,
      "step": 245760
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.596773506695328e-05,
      "loss": 4.3465,
      "step": 246272
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.595934911944276e-05,
      "loss": 4.3492,
      "step": 246784
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.595097955073597e-05,
      "loss": 4.3564,
      "step": 247296
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.594259360322545e-05,
      "loss": 4.351,
      "step": 247808
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.593420765571493e-05,
      "loss": 4.3661,
      "step": 248320
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.592582170820441e-05,
      "loss": 4.3496,
      "step": 248832
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.591743576069389e-05,
      "loss": 4.3374,
      "step": 249344
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.590904981318337e-05,
      "loss": 4.3399,
      "step": 249856
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.590066386567285e-05,
      "loss": 4.3353,
      "step": 250368
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.589227791816233e-05,
      "loss": 4.3579,
      "step": 250880
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.588389197065181e-05,
      "loss": 4.3389,
      "step": 251392
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.587550602314129e-05,
      "loss": 4.3493,
      "step": 251904
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.586712007563077e-05,
      "loss": 4.3448,
      "step": 252416
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.585873412812025e-05,
      "loss": 4.3442,
      "step": 252928
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.585038093821719e-05,
      "loss": 4.3525,
      "step": 253440
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.584199499070667e-05,
      "loss": 4.3274,
      "step": 253952
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5833609043196145e-05,
      "loss": 4.3281,
      "step": 254464
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5825223095685625e-05,
      "loss": 4.3346,
      "step": 254976
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5816837148175105e-05,
      "loss": 4.3477,
      "step": 255488
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.580846757946832e-05,
      "loss": 4.3264,
      "step": 256000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.58000816319578e-05,
      "loss": 4.3472,
      "step": 256512
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5791712063251017e-05,
      "loss": 4.3282,
      "step": 257024
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5783326115740497e-05,
      "loss": 4.3255,
      "step": 257536
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.577494016822997e-05,
      "loss": 4.3309,
      "step": 258048
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.576655422071945e-05,
      "loss": 4.3202,
      "step": 258560
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.575816827320893e-05,
      "loss": 4.3388,
      "step": 259072
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5749798704502146e-05,
      "loss": 4.3295,
      "step": 259584
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.574141275699162e-05,
      "loss": 4.3151,
      "step": 260096
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.57330268094811e-05,
      "loss": 4.3348,
      "step": 260608
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.572464086197058e-05,
      "loss": 4.3234,
      "step": 261120
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.571625491446006e-05,
      "loss": 4.3292,
      "step": 261632
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.570786896694954e-05,
      "loss": 4.3206,
      "step": 262144
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.569948301943902e-05,
      "loss": 4.3318,
      "step": 262656
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.56910970719285e-05,
      "loss": 4.3128,
      "step": 263168
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.568271112441798e-05,
      "loss": 4.3357,
      "step": 263680
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5674325176907465e-05,
      "loss": 4.3195,
      "step": 264192
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5665939229396945e-05,
      "loss": 4.3199,
      "step": 264704
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5657553281886425e-05,
      "loss": 4.3279,
      "step": 265216
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5649183713179634e-05,
      "loss": 4.3235,
      "step": 265728
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5640797765669114e-05,
      "loss": 4.331,
      "step": 266240
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5632411818158594e-05,
      "loss": 4.342,
      "step": 266752
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.5624025870648074e-05,
      "loss": 4.3382,
      "step": 267264
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.561565630194128e-05,
      "loss": 4.3233,
      "step": 267776
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.560727035443076e-05,
      "loss": 4.317,
      "step": 268288
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.559888440692024e-05,
      "loss": 4.3097,
      "step": 268800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.559049845940972e-05,
      "loss": 4.327,
      "step": 269312
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.558212889070293e-05,
      "loss": 4.3345,
      "step": 269824
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.557374294319242e-05,
      "loss": 4.3293,
      "step": 270336
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.55653569956819e-05,
      "loss": 4.3153,
      "step": 270848
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.555697104817138e-05,
      "loss": 4.3061,
      "step": 271360
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.554860147946459e-05,
      "loss": 4.3241,
      "step": 271872
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.554021553195407e-05,
      "loss": 4.3027,
      "step": 272384
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.553182958444355e-05,
      "loss": 4.3173,
      "step": 272896
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.552344363693303e-05,
      "loss": 4.3184,
      "step": 273408
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.551505768942251e-05,
      "loss": 4.3222,
      "step": 273920
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.550667174191198e-05,
      "loss": 4.3176,
      "step": 274432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.549828579440146e-05,
      "loss": 4.3239,
      "step": 274944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.548989984689094e-05,
      "loss": 4.3044,
      "step": 275456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.548151389938042e-05,
      "loss": 4.3166,
      "step": 275968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.54731279518699e-05,
      "loss": 4.3141,
      "step": 276480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5464758383163116e-05,
      "loss": 4.3054,
      "step": 276992
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5456372435652596e-05,
      "loss": 4.3035,
      "step": 277504
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5447986488142076e-05,
      "loss": 4.3211,
      "step": 278016
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5439600540631556e-05,
      "loss": 4.319,
      "step": 278528
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5431214593121036e-05,
      "loss": 4.3152,
      "step": 279040
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5422828645610516e-05,
      "loss": 4.3086,
      "step": 279552
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5414459076903725e-05,
      "loss": 4.319,
      "step": 280064
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5406073129393205e-05,
      "loss": 4.3022,
      "step": 280576
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5397703560686414e-05,
      "loss": 4.3152,
      "step": 281088
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5389317613175894e-05,
      "loss": 4.3196,
      "step": 281600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5380931665665374e-05,
      "loss": 4.3021,
      "step": 282112
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5372545718154854e-05,
      "loss": 4.3186,
      "step": 282624
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.536415977064434e-05,
      "loss": 4.2845,
      "step": 283136
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.535577382313382e-05,
      "loss": 4.292,
      "step": 283648
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.53473878756233e-05,
      "loss": 4.3014,
      "step": 284160
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.533900192811278e-05,
      "loss": 4.3046,
      "step": 284672
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.533061598060226e-05,
      "loss": 4.3121,
      "step": 285184
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.532224641189547e-05,
      "loss": 4.2942,
      "step": 285696
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.531386046438495e-05,
      "loss": 4.2948,
      "step": 286208
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.530547451687443e-05,
      "loss": 4.3008,
      "step": 286720
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.529708856936391e-05,
      "loss": 4.3112,
      "step": 287232
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.528870262185339e-05,
      "loss": 4.3074,
      "step": 287744
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.528031667434287e-05,
      "loss": 4.3069,
      "step": 288256
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.527193072683234e-05,
      "loss": 4.2959,
      "step": 288768
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.526354477932182e-05,
      "loss": 4.2976,
      "step": 289280
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.525517521061504e-05,
      "loss": 4.3112,
      "step": 289792
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5246805641908255e-05,
      "loss": 4.2964,
      "step": 290304
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5238419694397735e-05,
      "loss": 4.2952,
      "step": 290816
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5230033746887215e-05,
      "loss": 4.2996,
      "step": 291328
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5221647799376694e-05,
      "loss": 4.2857,
      "step": 291840
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.521326185186617e-05,
      "loss": 4.2995,
      "step": 292352
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5204892283159384e-05,
      "loss": 4.2975,
      "step": 292864
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5196506335648863e-05,
      "loss": 4.308,
      "step": 293376
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5188120388138343e-05,
      "loss": 4.2858,
      "step": 293888
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5179734440627817e-05,
      "loss": 4.2902,
      "step": 294400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5171348493117297e-05,
      "loss": 4.3105,
      "step": 294912
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.516297892441051e-05,
      "loss": 4.2899,
      "step": 295424
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.515459297689999e-05,
      "loss": 4.301,
      "step": 295936
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.514620702938947e-05,
      "loss": 4.2964,
      "step": 296448
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.513782108187895e-05,
      "loss": 4.3076,
      "step": 296960
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.512943513436843e-05,
      "loss": 4.2978,
      "step": 297472
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.512106556566164e-05,
      "loss": 4.2899,
      "step": 297984
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.511267961815112e-05,
      "loss": 4.2849,
      "step": 298496
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.51042936706406e-05,
      "loss": 4.2817,
      "step": 299008
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.509590772313008e-05,
      "loss": 4.2937,
      "step": 299520
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.508752177561956e-05,
      "loss": 4.3045,
      "step": 300032
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.507915220691277e-05,
      "loss": 4.2951,
      "step": 300544
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.507076625940225e-05,
      "loss": 4.2965,
      "step": 301056
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.506238031189173e-05,
      "loss": 4.2861,
      "step": 301568
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.505399436438121e-05,
      "loss": 4.2868,
      "step": 302080
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.50456084168707e-05,
      "loss": 4.2778,
      "step": 302592
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.503722246936018e-05,
      "loss": 4.2948,
      "step": 303104
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5028852900653386e-05,
      "loss": 4.2961,
      "step": 303616
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5020466953142866e-05,
      "loss": 4.2997,
      "step": 304128
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5012081005632346e-05,
      "loss": 4.2811,
      "step": 304640
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5003695058121826e-05,
      "loss": 4.2801,
      "step": 305152
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.2668843269348145,
      "eval_runtime": 552.3769,
      "eval_samples_per_second": 690.816,
      "eval_steps_per_second": 21.589,
      "step": 305276
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4995309110611306e-05,
      "loss": 4.2939,
      "step": 305664
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4986939541904515e-05,
      "loss": 4.2912,
      "step": 306176
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4978553594393995e-05,
      "loss": 4.2805,
      "step": 306688
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4970167646883475e-05,
      "loss": 4.2807,
      "step": 307200
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4961781699372955e-05,
      "loss": 4.2817,
      "step": 307712
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4953395751862435e-05,
      "loss": 4.2675,
      "step": 308224
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4945009804351915e-05,
      "loss": 4.2752,
      "step": 308736
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4936623856841395e-05,
      "loss": 4.2789,
      "step": 309248
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.492823790933088e-05,
      "loss": 4.2784,
      "step": 309760
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.491988471942782e-05,
      "loss": 4.2846,
      "step": 310272
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.49114987719173e-05,
      "loss": 4.2869,
      "step": 310784
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.490311282440678e-05,
      "loss": 4.2808,
      "step": 311296
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.489472687689626e-05,
      "loss": 4.2796,
      "step": 311808
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.488635730818947e-05,
      "loss": 4.2831,
      "step": 312320
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.487797136067895e-05,
      "loss": 4.2765,
      "step": 312832
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.486958541316843e-05,
      "loss": 4.2713,
      "step": 313344
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.486119946565791e-05,
      "loss": 4.2683,
      "step": 313856
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.485281351814739e-05,
      "loss": 4.2802,
      "step": 314368
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.484442757063687e-05,
      "loss": 4.2767,
      "step": 314880
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.483604162312635e-05,
      "loss": 4.287,
      "step": 315392
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.482765567561583e-05,
      "loss": 4.2717,
      "step": 315904
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.481926972810531e-05,
      "loss": 4.2833,
      "step": 316416
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4810900159398524e-05,
      "loss": 4.2768,
      "step": 316928
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.4802514211888004e-05,
      "loss": 4.2754,
      "step": 317440
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.479412826437748e-05,
      "loss": 4.2737,
      "step": 317952
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.478574231686696e-05,
      "loss": 4.2717,
      "step": 318464
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.477735636935644e-05,
      "loss": 4.2797,
      "step": 318976
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.476897042184592e-05,
      "loss": 4.2626,
      "step": 319488
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.47605844743354e-05,
      "loss": 4.2796,
      "step": 320000
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.475219852682488e-05,
      "loss": 4.266,
      "step": 320512
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4743828958118086e-05,
      "loss": 4.2644,
      "step": 321024
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.473544301060757e-05,
      "loss": 4.2728,
      "step": 321536
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.472705706309705e-05,
      "loss": 4.2765,
      "step": 322048
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.471867111558653e-05,
      "loss": 4.2683,
      "step": 322560
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.471030154687974e-05,
      "loss": 4.2648,
      "step": 323072
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.470193197817295e-05,
      "loss": 4.2694,
      "step": 323584
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.469354603066243e-05,
      "loss": 4.2718,
      "step": 324096
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.468516008315191e-05,
      "loss": 4.2832,
      "step": 324608
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.467677413564139e-05,
      "loss": 4.267,
      "step": 325120
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.466838818813087e-05,
      "loss": 4.2587,
      "step": 325632
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.466000224062035e-05,
      "loss": 4.2575,
      "step": 326144
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.465161629310983e-05,
      "loss": 4.2557,
      "step": 326656
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.464323034559931e-05,
      "loss": 4.2711,
      "step": 327168
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4634860776892526e-05,
      "loss": 4.2594,
      "step": 327680
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4626474829382006e-05,
      "loss": 4.2705,
      "step": 328192
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4618088881871486e-05,
      "loss": 4.2665,
      "step": 328704
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4609702934360966e-05,
      "loss": 4.2618,
      "step": 329216
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4601316986850446e-05,
      "loss": 4.2723,
      "step": 329728
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4592947418143655e-05,
      "loss": 4.2515,
      "step": 330240
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4584561470633135e-05,
      "loss": 4.2455,
      "step": 330752
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4576175523122615e-05,
      "loss": 4.2568,
      "step": 331264
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4567789575612095e-05,
      "loss": 4.2652,
      "step": 331776
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4559420006905304e-05,
      "loss": 4.2446,
      "step": 332288
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4551034059394784e-05,
      "loss": 4.2693,
      "step": 332800
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.454266449068799e-05,
      "loss": 4.2493,
      "step": 333312
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.453429492198121e-05,
      "loss": 4.245,
      "step": 333824
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.452590897447069e-05,
      "loss": 4.2511,
      "step": 334336
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.451752302696017e-05,
      "loss": 4.2444,
      "step": 334848
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.450913707944965e-05,
      "loss": 4.2635,
      "step": 335360
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.450075113193913e-05,
      "loss": 4.2543,
      "step": 335872
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.449236518442861e-05,
      "loss": 4.2376,
      "step": 336384
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.448397923691809e-05,
      "loss": 4.2531,
      "step": 336896
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.447559328940757e-05,
      "loss": 4.2463,
      "step": 337408
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.446720734189705e-05,
      "loss": 4.2572,
      "step": 337920
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.445883777319026e-05,
      "loss": 4.2415,
      "step": 338432
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.445045182567974e-05,
      "loss": 4.2536,
      "step": 338944
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.444206587816922e-05,
      "loss": 4.2358,
      "step": 339456
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.44336799306587e-05,
      "loss": 4.2546,
      "step": 339968
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.442529398314818e-05,
      "loss": 4.2435,
      "step": 340480
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.441690803563766e-05,
      "loss": 4.2444,
      "step": 340992
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.440852208812714e-05,
      "loss": 4.2534,
      "step": 341504
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.440013614061662e-05,
      "loss": 4.2463,
      "step": 342016
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4391766571909834e-05,
      "loss": 4.2555,
      "step": 342528
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4383380624399314e-05,
      "loss": 4.2638,
      "step": 343040
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.437499467688879e-05,
      "loss": 4.2626,
      "step": 343552
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4366625108182e-05,
      "loss": 4.2468,
      "step": 344064
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.435823916067148e-05,
      "loss": 4.2424,
      "step": 344576
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4349853213160956e-05,
      "loss": 4.2357,
      "step": 345088
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4341467265650436e-05,
      "loss": 4.249,
      "step": 345600
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4333081318139916e-05,
      "loss": 4.2633,
      "step": 346112
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.432471174943313e-05,
      "loss": 4.2518,
      "step": 346624
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.431632580192261e-05,
      "loss": 4.2416,
      "step": 347136
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.430795623321583e-05,
      "loss": 4.2285,
      "step": 347648
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.429957028570531e-05,
      "loss": 4.2545,
      "step": 348160
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.429118433819479e-05,
      "loss": 4.2301,
      "step": 348672
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4282814769487996e-05,
      "loss": 4.2406,
      "step": 349184
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4274428821977476e-05,
      "loss": 4.2422,
      "step": 349696
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.4266042874466956e-05,
      "loss": 4.2467,
      "step": 350208
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.425765692695643e-05,
      "loss": 4.2461,
      "step": 350720
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.424927097944591e-05,
      "loss": 4.2505,
      "step": 351232
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.424088503193539e-05,
      "loss": 4.2314,
      "step": 351744
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.423249908442487e-05,
      "loss": 4.2441,
      "step": 352256
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.422411313691435e-05,
      "loss": 4.2405,
      "step": 352768
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4215727189403836e-05,
      "loss": 4.2343,
      "step": 353280
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4207341241893316e-05,
      "loss": 4.2297,
      "step": 353792
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4198955294382796e-05,
      "loss": 4.2526,
      "step": 354304
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4190569346872276e-05,
      "loss": 4.2454,
      "step": 354816
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4182183399361756e-05,
      "loss": 4.244,
      "step": 355328
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4173797451851236e-05,
      "loss": 4.2359,
      "step": 355840
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4165411504340716e-05,
      "loss": 4.2424,
      "step": 356352
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4157025556830196e-05,
      "loss": 4.2304,
      "step": 356864
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4148655988123405e-05,
      "loss": 4.2452,
      "step": 357376
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4140270040612885e-05,
      "loss": 4.2448,
      "step": 357888
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4131900471906094e-05,
      "loss": 4.231,
      "step": 358400
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4123514524395574e-05,
      "loss": 4.2478,
      "step": 358912
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4115128576885054e-05,
      "loss": 4.2148,
      "step": 359424
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4106742629374534e-05,
      "loss": 4.2219,
      "step": 359936
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.409835668186402e-05,
      "loss": 4.229,
      "step": 360448
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.40899707343535e-05,
      "loss": 4.2327,
      "step": 360960
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.4081584786842974e-05,
      "loss": 4.2422,
      "step": 361472
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.407321521813619e-05,
      "loss": 4.2302,
      "step": 361984
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.406482927062567e-05,
      "loss": 4.2193,
      "step": 362496
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.405644332311514e-05,
      "loss": 4.2283,
      "step": 363008
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.404805737560462e-05,
      "loss": 4.2396,
      "step": 363520
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.403968780689784e-05,
      "loss": 4.2368,
      "step": 364032
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.403130185938732e-05,
      "loss": 4.2402,
      "step": 364544
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.402291591187679e-05,
      "loss": 4.2264,
      "step": 365056
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.401452996436627e-05,
      "loss": 4.2252,
      "step": 365568
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.400614401685576e-05,
      "loss": 4.2421,
      "step": 366080
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.399777444814897e-05,
      "loss": 4.2293,
      "step": 366592
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.398938850063845e-05,
      "loss": 4.2244,
      "step": 367104
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.398100255312793e-05,
      "loss": 4.2292,
      "step": 367616
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.397261660561741e-05,
      "loss": 4.2178,
      "step": 368128
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.396423065810689e-05,
      "loss": 4.2304,
      "step": 368640
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.395584471059637e-05,
      "loss": 4.2314,
      "step": 369152
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.394745876308585e-05,
      "loss": 4.2375,
      "step": 369664
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3939089194379056e-05,
      "loss": 4.2181,
      "step": 370176
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3930703246868536e-05,
      "loss": 4.2235,
      "step": 370688
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3922317299358016e-05,
      "loss": 4.2384,
      "step": 371200
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3913931351847496e-05,
      "loss": 4.2273,
      "step": 371712
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3905545404336976e-05,
      "loss": 4.2315,
      "step": 372224
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3897159456826456e-05,
      "loss": 4.2322,
      "step": 372736
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3888773509315936e-05,
      "loss": 4.2368,
      "step": 373248
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.388038756180542e-05,
      "loss": 4.235,
      "step": 373760
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.387201799309863e-05,
      "loss": 4.2204,
      "step": 374272
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.386363204558811e-05,
      "loss": 4.2184,
      "step": 374784
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.385524609807759e-05,
      "loss": 4.2202,
      "step": 375296
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.384686015056707e-05,
      "loss": 4.2283,
      "step": 375808
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.383847420305655e-05,
      "loss": 4.2312,
      "step": 376320
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.383010463434976e-05,
      "loss": 4.2327,
      "step": 376832
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.382173506564297e-05,
      "loss": 4.2304,
      "step": 377344
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.381334911813245e-05,
      "loss": 4.2221,
      "step": 377856
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.380496317062193e-05,
      "loss": 4.2214,
      "step": 378368
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.379657722311141e-05,
      "loss": 4.212,
      "step": 378880
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.378819127560089e-05,
      "loss": 4.2287,
      "step": 379392
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3779805328090376e-05,
      "loss": 4.2309,
      "step": 379904
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3771419380579856e-05,
      "loss": 4.2346,
      "step": 380416
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3763033433069336e-05,
      "loss": 4.2136,
      "step": 380928
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.3754663864362545e-05,
      "loss": 4.216,
      "step": 381440
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.209681034088135,
      "eval_runtime": 546.9996,
      "eval_samples_per_second": 697.607,
      "eval_steps_per_second": 21.801,
      "step": 381595
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3746277916852025e-05,
      "loss": 4.2385,
      "step": 381952
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3737891969341505e-05,
      "loss": 4.223,
      "step": 382464
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.372950602183098e-05,
      "loss": 4.2188,
      "step": 382976
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.372112007432046e-05,
      "loss": 4.2132,
      "step": 383488
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.371273412680994e-05,
      "loss": 4.2169,
      "step": 384000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.370434817929942e-05,
      "loss": 4.206,
      "step": 384512
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.36959622317889e-05,
      "loss": 4.2118,
      "step": 385024
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.368757628427838e-05,
      "loss": 4.21,
      "step": 385536
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.367919033676786e-05,
      "loss": 4.2146,
      "step": 386048
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3670804389257345e-05,
      "loss": 4.2242,
      "step": 386560
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3662418441746825e-05,
      "loss": 4.2179,
      "step": 387072
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3654032494236305e-05,
      "loss": 4.2164,
      "step": 387584
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3645646546725785e-05,
      "loss": 4.2206,
      "step": 388096
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3637260599215265e-05,
      "loss": 4.2192,
      "step": 388608
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3628874651704745e-05,
      "loss": 4.2126,
      "step": 389120
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3620488704194225e-05,
      "loss": 4.2078,
      "step": 389632
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3612119135487434e-05,
      "loss": 4.2057,
      "step": 390144
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3603733187976914e-05,
      "loss": 4.2136,
      "step": 390656
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3595347240466394e-05,
      "loss": 4.2142,
      "step": 391168
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.358696129295587e-05,
      "loss": 4.2238,
      "step": 391680
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.357857534544535e-05,
      "loss": 4.2102,
      "step": 392192
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.357018939793483e-05,
      "loss": 4.2208,
      "step": 392704
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.356180345042431e-05,
      "loss": 4.2169,
      "step": 393216
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.355341750291379e-05,
      "loss": 4.2107,
      "step": 393728
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.3545047934207e-05,
      "loss": 4.2116,
      "step": 394240
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.353666198669648e-05,
      "loss": 4.2096,
      "step": 394752
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.352827603918596e-05,
      "loss": 4.2189,
      "step": 395264
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.351989009167544e-05,
      "loss": 4.1993,
      "step": 395776
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.351152052296865e-05,
      "loss": 4.2214,
      "step": 396288
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.350313457545813e-05,
      "loss": 4.2033,
      "step": 396800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.349474862794761e-05,
      "loss": 4.1994,
      "step": 397312
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.348636268043709e-05,
      "loss": 4.2146,
      "step": 397824
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.34779931117303e-05,
      "loss": 4.2114,
      "step": 398336
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3469623543023516e-05,
      "loss": 4.2141,
      "step": 398848
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3461237595512996e-05,
      "loss": 4.2059,
      "step": 399360
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3452851648002476e-05,
      "loss": 4.2053,
      "step": 399872
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.344448207929569e-05,
      "loss": 4.2109,
      "step": 400384
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3436096131785165e-05,
      "loss": 4.2248,
      "step": 400896
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3427710184274645e-05,
      "loss": 4.2069,
      "step": 401408
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3419324236764125e-05,
      "loss": 4.1988,
      "step": 401920
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3410938289253605e-05,
      "loss": 4.197,
      "step": 402432
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3402552341743085e-05,
      "loss": 4.1953,
      "step": 402944
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3394166394232565e-05,
      "loss": 4.2089,
      "step": 403456
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3385780446722045e-05,
      "loss": 4.2018,
      "step": 403968
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3377394499211525e-05,
      "loss": 4.2077,
      "step": 404480
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3369008551701005e-05,
      "loss": 4.2098,
      "step": 404992
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3360622604190485e-05,
      "loss": 4.2019,
      "step": 405504
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.33522530354837e-05,
      "loss": 4.2146,
      "step": 406016
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.334386708797318e-05,
      "loss": 4.1925,
      "step": 406528
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.333549751926639e-05,
      "loss": 4.1841,
      "step": 407040
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.33271279505596e-05,
      "loss": 4.2021,
      "step": 407552
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.331874200304908e-05,
      "loss": 4.1995,
      "step": 408064
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.331035605553856e-05,
      "loss": 4.191,
      "step": 408576
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.330197010802804e-05,
      "loss": 4.2095,
      "step": 409088
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.329360053932125e-05,
      "loss": 4.1891,
      "step": 409600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.328521459181073e-05,
      "loss": 4.1906,
      "step": 410112
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.327682864430021e-05,
      "loss": 4.1929,
      "step": 410624
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.326844269678969e-05,
      "loss": 4.1879,
      "step": 411136
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.326005674927917e-05,
      "loss": 4.2034,
      "step": 411648
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3251670801768654e-05,
      "loss": 4.1946,
      "step": 412160
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3243284854258134e-05,
      "loss": 4.1888,
      "step": 412672
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3234898906747614e-05,
      "loss": 4.1914,
      "step": 413184
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3226512959237094e-05,
      "loss": 4.185,
      "step": 413696
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3218127011726574e-05,
      "loss": 4.2034,
      "step": 414208
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3209741064216054e-05,
      "loss": 4.187,
      "step": 414720
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.320135511670553e-05,
      "loss": 4.201,
      "step": 415232
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.319298554799874e-05,
      "loss": 4.1758,
      "step": 415744
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.318459960048822e-05,
      "loss": 4.1942,
      "step": 416256
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.31762136529777e-05,
      "loss": 4.1887,
      "step": 416768
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3167827705467176e-05,
      "loss": 4.1864,
      "step": 417280
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.315945813676039e-05,
      "loss": 4.1974,
      "step": 417792
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.315108856805361e-05,
      "loss": 4.1929,
      "step": 418304
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.314270262054309e-05,
      "loss": 4.1956,
      "step": 418816
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.313431667303257e-05,
      "loss": 4.2061,
      "step": 419328
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.312593072552205e-05,
      "loss": 4.2069,
      "step": 419840
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.311756115681526e-05,
      "loss": 4.1948,
      "step": 420352
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.310917520930474e-05,
      "loss": 4.1879,
      "step": 420864
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.310078926179422e-05,
      "loss": 4.1817,
      "step": 421376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.30924033142837e-05,
      "loss": 4.192,
      "step": 421888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.308401736677318e-05,
      "loss": 4.2048,
      "step": 422400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.307563141926265e-05,
      "loss": 4.196,
      "step": 422912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.306724547175213e-05,
      "loss": 4.1901,
      "step": 423424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.305885952424161e-05,
      "loss": 4.1738,
      "step": 423936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3050489955534826e-05,
      "loss": 4.2003,
      "step": 424448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3042104008024306e-05,
      "loss": 4.1731,
      "step": 424960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3033718060513786e-05,
      "loss": 4.1871,
      "step": 425472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3025332113003266e-05,
      "loss": 4.1884,
      "step": 425984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3016946165492746e-05,
      "loss": 4.1923,
      "step": 426496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.3008560217982226e-05,
      "loss": 4.1904,
      "step": 427008
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3000174270471705e-05,
      "loss": 4.1993,
      "step": 427520
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2991788322961185e-05,
      "loss": 4.1745,
      "step": 428032
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2983418754254395e-05,
      "loss": 4.1861,
      "step": 428544
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2975032806743874e-05,
      "loss": 4.1902,
      "step": 429056
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2966646859233354e-05,
      "loss": 4.1805,
      "step": 429568
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2958260911722834e-05,
      "loss": 4.1762,
      "step": 430080
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2949891343016043e-05,
      "loss": 4.1959,
      "step": 430592
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.294152177430926e-05,
      "loss": 4.19,
      "step": 431104
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.293313582679874e-05,
      "loss": 4.197,
      "step": 431616
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.292474987928822e-05,
      "loss": 4.1811,
      "step": 432128
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.29163639317777e-05,
      "loss": 4.188,
      "step": 432640
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.290797798426718e-05,
      "loss": 4.1771,
      "step": 433152
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.289960841556039e-05,
      "loss": 4.1914,
      "step": 433664
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.289122246804987e-05,
      "loss": 4.1946,
      "step": 434176
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.288283652053935e-05,
      "loss": 4.178,
      "step": 434688
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.287445057302883e-05,
      "loss": 4.1948,
      "step": 435200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.286606462551831e-05,
      "loss": 4.1623,
      "step": 435712
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.285767867800779e-05,
      "loss": 4.1725,
      "step": 436224
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.284929273049727e-05,
      "loss": 4.1719,
      "step": 436736
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.284090678298675e-05,
      "loss": 4.1812,
      "step": 437248
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.283252083547623e-05,
      "loss": 4.1905,
      "step": 437760
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.282413488796571e-05,
      "loss": 4.1836,
      "step": 438272
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.281574894045519e-05,
      "loss": 4.1593,
      "step": 438784
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.280736299294467e-05,
      "loss": 4.1801,
      "step": 439296
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2798993424237884e-05,
      "loss": 4.1855,
      "step": 439808
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2790607476727364e-05,
      "loss": 4.1882,
      "step": 440320
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.278222152921684e-05,
      "loss": 4.1846,
      "step": 440832
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.277383558170632e-05,
      "loss": 4.1753,
      "step": 441344
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.276546601299953e-05,
      "loss": 4.1739,
      "step": 441856
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.275709644429274e-05,
      "loss": 4.1929,
      "step": 442368
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.274871049678222e-05,
      "loss": 4.1775,
      "step": 442880
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.27403245492717e-05,
      "loss": 4.1752,
      "step": 443392
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.273193860176118e-05,
      "loss": 4.1727,
      "step": 443904
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.27235690330544e-05,
      "loss": 4.1699,
      "step": 444416
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.271518308554388e-05,
      "loss": 4.1783,
      "step": 444928
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.270679713803336e-05,
      "loss": 4.1811,
      "step": 445440
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.269841119052284e-05,
      "loss": 4.1865,
      "step": 445952
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.269002524301231e-05,
      "loss": 4.1705,
      "step": 446464
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.268163929550179e-05,
      "loss": 4.1706,
      "step": 446976
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.267325334799127e-05,
      "loss": 4.1865,
      "step": 447488
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.266486740048075e-05,
      "loss": 4.1726,
      "step": 448000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.265649783177396e-05,
      "loss": 4.1804,
      "step": 448512
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.264811188426344e-05,
      "loss": 4.1829,
      "step": 449024
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.263972593675292e-05,
      "loss": 4.1881,
      "step": 449536
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.26313399892424e-05,
      "loss": 4.1844,
      "step": 450048
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2622970420535615e-05,
      "loss": 4.1725,
      "step": 450560
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2614584473025095e-05,
      "loss": 4.1635,
      "step": 451072
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2606198525514575e-05,
      "loss": 4.1709,
      "step": 451584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2597812578004055e-05,
      "loss": 4.1769,
      "step": 452096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2589426630493535e-05,
      "loss": 4.1859,
      "step": 452608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2581040682983015e-05,
      "loss": 4.1807,
      "step": 453120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2572654735472495e-05,
      "loss": 4.1762,
      "step": 453632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2564268787961975e-05,
      "loss": 4.1764,
      "step": 454144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2555899219255184e-05,
      "loss": 4.1747,
      "step": 454656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.2547513271744664e-05,
      "loss": 4.1639,
      "step": 455168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.253914370303787e-05,
      "loss": 4.1714,
      "step": 455680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.253075775552735e-05,
      "loss": 4.1847,
      "step": 456192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.252237180801684e-05,
      "loss": 4.1864,
      "step": 456704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.251398586050632e-05,
      "loss": 4.1653,
      "step": 457216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.25055999129958e-05,
      "loss": 4.1643,
      "step": 457728
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.168251037597656,
      "eval_runtime": 545.9187,
      "eval_samples_per_second": 698.989,
      "eval_steps_per_second": 21.844,
      "step": 457914
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.249721396548528e-05,
      "loss": 4.1915,
      "step": 458240
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.248882801797476e-05,
      "loss": 4.1737,
      "step": 458752
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.248044207046424e-05,
      "loss": 4.1656,
      "step": 459264
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.247205612295372e-05,
      "loss": 4.1677,
      "step": 459776
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.24636701754432e-05,
      "loss": 4.1644,
      "step": 460288
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.245528422793267e-05,
      "loss": 4.1587,
      "step": 460800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.244689828042215e-05,
      "loss": 4.16,
      "step": 461312
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.243851233291163e-05,
      "loss": 4.1653,
      "step": 461824
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.243012638540111e-05,
      "loss": 4.1674,
      "step": 462336
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.242174043789059e-05,
      "loss": 4.1727,
      "step": 462848
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.241335449038007e-05,
      "loss": 4.1712,
      "step": 463360
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.240496854286955e-05,
      "loss": 4.1637,
      "step": 463872
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.239658259535903e-05,
      "loss": 4.1774,
      "step": 464384
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.238819664784852e-05,
      "loss": 4.1672,
      "step": 464896
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.2379810700338e-05,
      "loss": 4.1677,
      "step": 465408
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.237142475282748e-05,
      "loss": 4.1555,
      "step": 465920
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.236303880531696e-05,
      "loss": 4.1545,
      "step": 466432
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.235465285780644e-05,
      "loss": 4.1692,
      "step": 466944
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.234626691029591e-05,
      "loss": 4.1657,
      "step": 467456
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.233789734158913e-05,
      "loss": 4.1758,
      "step": 467968
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.232952777288234e-05,
      "loss": 4.1636,
      "step": 468480
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.232114182537182e-05,
      "loss": 4.1726,
      "step": 468992
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.23127558778613e-05,
      "loss": 4.1683,
      "step": 469504
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.230436993035078e-05,
      "loss": 4.1659,
      "step": 470016
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.229598398284026e-05,
      "loss": 4.1632,
      "step": 470528
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.228759803532974e-05,
      "loss": 4.1617,
      "step": 471040
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.227921208781922e-05,
      "loss": 4.1717,
      "step": 471552
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.22708261403087e-05,
      "loss": 4.1529,
      "step": 472064
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.226245657160191e-05,
      "loss": 4.1723,
      "step": 472576
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.2254070624091386e-05,
      "loss": 4.1603,
      "step": 473088
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2245684676580866e-05,
      "loss": 4.1518,
      "step": 473600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2237298729070346e-05,
      "loss": 4.164,
      "step": 474112
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.222894553916729e-05,
      "loss": 4.166,
      "step": 474624
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.222055959165677e-05,
      "loss": 4.1698,
      "step": 475136
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.221217364414625e-05,
      "loss": 4.157,
      "step": 475648
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.220378769663573e-05,
      "loss": 4.1624,
      "step": 476160
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.219540174912521e-05,
      "loss": 4.1648,
      "step": 476672
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2187032180418426e-05,
      "loss": 4.1749,
      "step": 477184
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2178646232907906e-05,
      "loss": 4.1629,
      "step": 477696
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2170260285397386e-05,
      "loss": 4.1525,
      "step": 478208
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2161890716690595e-05,
      "loss": 4.1505,
      "step": 478720
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2153504769180075e-05,
      "loss": 4.1471,
      "step": 479232
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2145118821669555e-05,
      "loss": 4.1671,
      "step": 479744
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2136732874159035e-05,
      "loss": 4.1578,
      "step": 480256
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.212834692664851e-05,
      "loss": 4.1582,
      "step": 480768
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.211996097913799e-05,
      "loss": 4.1688,
      "step": 481280
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.211157503162747e-05,
      "loss": 4.1538,
      "step": 481792
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.210318908411695e-05,
      "loss": 4.1678,
      "step": 482304
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.209480313660643e-05,
      "loss": 4.1497,
      "step": 482816
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.208641718909591e-05,
      "loss": 4.1434,
      "step": 483328
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2078031241585395e-05,
      "loss": 4.1556,
      "step": 483840
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2069661672878604e-05,
      "loss": 4.1511,
      "step": 484352
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2061275725368084e-05,
      "loss": 4.1497,
      "step": 484864
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2052889777857564e-05,
      "loss": 4.1591,
      "step": 485376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2044503830347044e-05,
      "loss": 4.1508,
      "step": 485888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2036117882836524e-05,
      "loss": 4.1447,
      "step": 486400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2027731935326004e-05,
      "loss": 4.1453,
      "step": 486912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2019345987815484e-05,
      "loss": 4.145,
      "step": 487424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2010960040304964e-05,
      "loss": 4.158,
      "step": 487936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.200259047159817e-05,
      "loss": 4.1492,
      "step": 488448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.199420452408765e-05,
      "loss": 4.1435,
      "step": 488960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.198583495538086e-05,
      "loss": 4.1475,
      "step": 489472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.197744900787035e-05,
      "loss": 4.1347,
      "step": 489984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.196906306035983e-05,
      "loss": 4.163,
      "step": 490496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.196067711284931e-05,
      "loss": 4.1447,
      "step": 491008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.195229116533879e-05,
      "loss": 4.1518,
      "step": 491520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.194390521782827e-05,
      "loss": 4.1387,
      "step": 492032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.193551927031775e-05,
      "loss": 4.1444,
      "step": 492544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.192713332280722e-05,
      "loss": 4.1473,
      "step": 493056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.191876375410044e-05,
      "loss": 4.1427,
      "step": 493568
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.191037780658992e-05,
      "loss": 4.1548,
      "step": 494080
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.19019918590794e-05,
      "loss": 4.1472,
      "step": 494592
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.189360591156887e-05,
      "loss": 4.1538,
      "step": 495104
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1885236342862086e-05,
      "loss": 4.1619,
      "step": 495616
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.18768667741553e-05,
      "loss": 4.1607,
      "step": 496128
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.186848082664478e-05,
      "loss": 4.1534,
      "step": 496640
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.186009487913426e-05,
      "loss": 4.1463,
      "step": 497152
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.185170893162374e-05,
      "loss": 4.1398,
      "step": 497664
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.184332298411322e-05,
      "loss": 4.1479,
      "step": 498176
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.183495341540643e-05,
      "loss": 4.1587,
      "step": 498688
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.182656746789591e-05,
      "loss": 4.1544,
      "step": 499200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.181818152038539e-05,
      "loss": 4.1462,
      "step": 499712
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.180979557287487e-05,
      "loss": 4.1299,
      "step": 500224
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1801409625364344e-05,
      "loss": 4.1584,
      "step": 500736
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1793023677853824e-05,
      "loss": 4.1272,
      "step": 501248
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1784637730343304e-05,
      "loss": 4.1449,
      "step": 501760
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.177626816163652e-05,
      "loss": 4.1471,
      "step": 502272
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1767882214126e-05,
      "loss": 4.1442,
      "step": 502784
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.175949626661548e-05,
      "loss": 4.1502,
      "step": 503296
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.175111031910496e-05,
      "loss": 4.1562,
      "step": 503808
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.174272437159444e-05,
      "loss": 4.13,
      "step": 504320
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.173433842408392e-05,
      "loss": 4.145,
      "step": 504832
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.17259524765734e-05,
      "loss": 4.1482,
      "step": 505344
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.171758290786661e-05,
      "loss": 4.1383,
      "step": 505856
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.170919696035609e-05,
      "loss": 4.1326,
      "step": 506368
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.170081101284557e-05,
      "loss": 4.155,
      "step": 506880
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.169242506533505e-05,
      "loss": 4.149,
      "step": 507392
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.168403911782453e-05,
      "loss": 4.1573,
      "step": 507904
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.167565317031401e-05,
      "loss": 4.1382,
      "step": 508416
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.166726722280349e-05,
      "loss": 4.1469,
      "step": 508928
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.165888127529297e-05,
      "loss": 4.1337,
      "step": 509440
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1650511706586184e-05,
      "loss": 4.1492,
      "step": 509952
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1642142137879393e-05,
      "loss": 4.1519,
      "step": 510464
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1633756190368873e-05,
      "loss": 4.1422,
      "step": 510976
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1625370242858353e-05,
      "loss": 4.1495,
      "step": 511488
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.161698429534783e-05,
      "loss": 4.1246,
      "step": 512000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.160861472664104e-05,
      "loss": 4.1272,
      "step": 512512
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.160022877913052e-05,
      "loss": 4.1262,
      "step": 513024
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.159184283162e-05,
      "loss": 4.1401,
      "step": 513536
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.158345688410948e-05,
      "loss": 4.1492,
      "step": 514048
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.157507093659896e-05,
      "loss": 4.1468,
      "step": 514560
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.156668498908844e-05,
      "loss": 4.1191,
      "step": 515072
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.155829904157792e-05,
      "loss": 4.1368,
      "step": 515584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.154992947287114e-05,
      "loss": 4.1454,
      "step": 516096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.154154352536062e-05,
      "loss": 4.1482,
      "step": 516608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.15331575778501e-05,
      "loss": 4.1443,
      "step": 517120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.152477163033958e-05,
      "loss": 4.1359,
      "step": 517632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.151638568282906e-05,
      "loss": 4.1354,
      "step": 518144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.150801611412227e-05,
      "loss": 4.1453,
      "step": 518656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.149963016661175e-05,
      "loss": 4.1396,
      "step": 519168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.149124421910123e-05,
      "loss": 4.1349,
      "step": 519680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.14828582715907e-05,
      "loss": 4.1299,
      "step": 520192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.147447232408018e-05,
      "loss": 4.1271,
      "step": 520704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1466102755373396e-05,
      "loss": 4.141,
      "step": 521216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1457716807862876e-05,
      "loss": 4.1404,
      "step": 521728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1449330860352356e-05,
      "loss": 4.1429,
      "step": 522240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1440944912841836e-05,
      "loss": 4.1325,
      "step": 522752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1432558965331316e-05,
      "loss": 4.1293,
      "step": 523264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1424173017820796e-05,
      "loss": 4.1472,
      "step": 523776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1415787070310276e-05,
      "loss": 4.1295,
      "step": 524288
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1407401122799756e-05,
      "loss": 4.1422,
      "step": 524800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.13990479328967e-05,
      "loss": 4.1462,
      "step": 525312
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1390661985386174e-05,
      "loss": 4.1444,
      "step": 525824
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1382276037875654e-05,
      "loss": 4.146,
      "step": 526336
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1373890090365134e-05,
      "loss": 4.136,
      "step": 526848
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1365504142854614e-05,
      "loss": 4.1223,
      "step": 527360
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1357118195344094e-05,
      "loss": 4.1308,
      "step": 527872
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.134873224783358e-05,
      "loss": 4.135,
      "step": 528384
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.134034630032306e-05,
      "loss": 4.1491,
      "step": 528896
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.133197673161627e-05,
      "loss": 4.1376,
      "step": 529408
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.132359078410575e-05,
      "loss": 4.1429,
      "step": 529920
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.131520483659523e-05,
      "loss": 4.1342,
      "step": 530432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.130683526788844e-05,
      "loss": 4.135,
      "step": 530944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.129844932037792e-05,
      "loss": 4.1232,
      "step": 531456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.12900633728674e-05,
      "loss": 4.133,
      "step": 531968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.128167742535688e-05,
      "loss": 4.1456,
      "step": 532480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.127329147784636e-05,
      "loss": 4.1475,
      "step": 532992
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.126490553033584e-05,
      "loss": 4.1225,
      "step": 533504
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.125651958282532e-05,
      "loss": 4.1289,
      "step": 534016
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.136957168579102,
      "eval_runtime": 548.4137,
      "eval_samples_per_second": 695.809,
      "eval_steps_per_second": 21.745,
      "step": 534233
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.12481336353148e-05,
      "loss": 4.1533,
      "step": 534528
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.123974768780428e-05,
      "loss": 4.1362,
      "step": 535040
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.123136174029376e-05,
      "loss": 4.1283,
      "step": 535552
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1222975792783245e-05,
      "loss": 4.1313,
      "step": 536064
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.121458984527272e-05,
      "loss": 4.1234,
      "step": 536576
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.12062038977622e-05,
      "loss": 4.1248,
      "step": 537088
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.119781795025168e-05,
      "loss": 4.1154,
      "step": 537600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.118943200274116e-05,
      "loss": 4.1302,
      "step": 538112
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.118104605523064e-05,
      "loss": 4.1264,
      "step": 538624
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.117266010772012e-05,
      "loss": 4.1354,
      "step": 539136
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.11642741602096e-05,
      "loss": 4.1317,
      "step": 539648
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.115588821269908e-05,
      "loss": 4.128,
      "step": 540160
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.114750226518856e-05,
      "loss": 4.1389,
      "step": 540672
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.113911631767804e-05,
      "loss": 4.1253,
      "step": 541184
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.113073037016752e-05,
      "loss": 4.136,
      "step": 541696
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1122344422657e-05,
      "loss": 4.1154,
      "step": 542208
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.111395847514648e-05,
      "loss": 4.1198,
      "step": 542720
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.110557252763596e-05,
      "loss": 4.1309,
      "step": 543232
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.109718658012544e-05,
      "loss": 4.1252,
      "step": 543744
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.108881701141865e-05,
      "loss": 4.1379,
      "step": 544256
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.108043106390813e-05,
      "loss": 4.1272,
      "step": 544768
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1072045116397606e-05,
      "loss": 4.1326,
      "step": 545280
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1063659168887086e-05,
      "loss": 4.1322,
      "step": 545792
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1055273221376566e-05,
      "loss": 4.1234,
      "step": 546304
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1046887273866046e-05,
      "loss": 4.1314,
      "step": 546816
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1038517705159255e-05,
      "loss": 4.1192,
      "step": 547328
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1030131757648735e-05,
      "loss": 4.1348,
      "step": 547840
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1021745810138215e-05,
      "loss": 4.1219,
      "step": 548352
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1013359862627695e-05,
      "loss": 4.1323,
      "step": 548864
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.100499029392091e-05,
      "loss": 4.1184,
      "step": 549376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.099660434641039e-05,
      "loss": 4.1179,
      "step": 549888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.098821839889987e-05,
      "loss": 4.1276,
      "step": 550400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.097983245138935e-05,
      "loss": 4.1268,
      "step": 550912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.097144650387883e-05,
      "loss": 4.1305,
      "step": 551424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.096306055636831e-05,
      "loss": 4.1263,
      "step": 551936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.095469098766152e-05,
      "loss": 4.1209,
      "step": 552448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0946305040151e-05,
      "loss": 4.1302,
      "step": 552960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.093791909264048e-05,
      "loss": 4.1349,
      "step": 553472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.092953314512996e-05,
      "loss": 4.1262,
      "step": 553984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.092114719761944e-05,
      "loss": 4.117,
      "step": 554496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.091276125010892e-05,
      "loss": 4.1124,
      "step": 555008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.09043753025984e-05,
      "loss": 4.1139,
      "step": 555520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.089598935508788e-05,
      "loss": 4.1296,
      "step": 556032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0887619786381095e-05,
      "loss": 4.1217,
      "step": 556544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0879233838870575e-05,
      "loss": 4.1165,
      "step": 557056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0870847891360055e-05,
      "loss": 4.1354,
      "step": 557568
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0862478322653264e-05,
      "loss": 4.1159,
      "step": 558080
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0854092375142744e-05,
      "loss": 4.1307,
      "step": 558592
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0845706427632224e-05,
      "loss": 4.114,
      "step": 559104
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0837336858925433e-05,
      "loss": 4.1062,
      "step": 559616
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0828950911414913e-05,
      "loss": 4.1169,
      "step": 560128
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.082056496390439e-05,
      "loss": 4.1202,
      "step": 560640
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.081217901639387e-05,
      "loss": 4.1107,
      "step": 561152
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.080379306888335e-05,
      "loss": 4.1247,
      "step": 561664
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.079540712137283e-05,
      "loss": 4.1162,
      "step": 562176
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.078702117386231e-05,
      "loss": 4.1045,
      "step": 562688
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.077863522635179e-05,
      "loss": 4.1161,
      "step": 563200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.077024927884127e-05,
      "loss": 4.104,
      "step": 563712
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.076186333133075e-05,
      "loss": 4.1203,
      "step": 564224
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.075349376262397e-05,
      "loss": 4.1137,
      "step": 564736
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.074510781511344e-05,
      "loss": 4.1112,
      "step": 565248
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.073672186760292e-05,
      "loss": 4.1097,
      "step": 565760
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.07283359200924e-05,
      "loss": 4.1013,
      "step": 566272
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.071996635138561e-05,
      "loss": 4.1239,
      "step": 566784
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.071158040387509e-05,
      "loss": 4.1065,
      "step": 567296
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.070319445636457e-05,
      "loss": 4.118,
      "step": 567808
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.069480850885405e-05,
      "loss": 4.1043,
      "step": 568320
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.068642256134354e-05,
      "loss": 4.1104,
      "step": 568832
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.067803661383302e-05,
      "loss": 4.1085,
      "step": 569344
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.06696506663225e-05,
      "loss": 4.1104,
      "step": 569856
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.066126471881198e-05,
      "loss": 4.1143,
      "step": 570368
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.065289515010519e-05,
      "loss": 4.1114,
      "step": 570880
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.064450920259467e-05,
      "loss": 4.1176,
      "step": 571392
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0636123255084147e-05,
      "loss": 4.1266,
      "step": 571904
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0627737307573627e-05,
      "loss": 4.1268,
      "step": 572416
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0619351360063106e-05,
      "loss": 4.1157,
      "step": 572928
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0610965412552586e-05,
      "loss": 4.1157,
      "step": 573440
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0602579465042066e-05,
      "loss": 4.0978,
      "step": 573952
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0594209896335276e-05,
      "loss": 4.1137,
      "step": 574464
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0585823948824755e-05,
      "loss": 4.1259,
      "step": 574976
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0577438001314235e-05,
      "loss": 4.1185,
      "step": 575488
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.056905205380372e-05,
      "loss": 4.1155,
      "step": 576000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.056068248509693e-05,
      "loss": 4.0954,
      "step": 576512
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.055229653758641e-05,
      "loss": 4.1194,
      "step": 577024
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.054391059007589e-05,
      "loss": 4.0984,
      "step": 577536
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.053552464256537e-05,
      "loss": 4.1091,
      "step": 578048
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.052713869505485e-05,
      "loss": 4.1077,
      "step": 578560
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0518752747544324e-05,
      "loss": 4.1133,
      "step": 579072
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0510366800033804e-05,
      "loss": 4.1158,
      "step": 579584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0501980852523284e-05,
      "loss": 4.1173,
      "step": 580096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.04936112838165e-05,
      "loss": 4.0997,
      "step": 580608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.048522533630597e-05,
      "loss": 4.1155,
      "step": 581120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.047685576759919e-05,
      "loss": 4.1084,
      "step": 581632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0468469820088676e-05,
      "loss": 4.1056,
      "step": 582144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0460083872578156e-05,
      "loss": 4.0968,
      "step": 582656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.045169792506763e-05,
      "loss": 4.1184,
      "step": 583168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.044331197755711e-05,
      "loss": 4.1134,
      "step": 583680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.043492603004659e-05,
      "loss": 4.1208,
      "step": 584192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.04265564613398e-05,
      "loss": 4.1092,
      "step": 584704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.041817051382928e-05,
      "loss": 4.1128,
      "step": 585216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.040978456631876e-05,
      "loss": 4.0978,
      "step": 585728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.040139861880824e-05,
      "loss": 4.1173,
      "step": 586240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.039302905010145e-05,
      "loss": 4.1139,
      "step": 586752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.038464310259093e-05,
      "loss": 4.1101,
      "step": 587264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0376257155080414e-05,
      "loss": 4.1123,
      "step": 587776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0367871207569894e-05,
      "loss": 4.0936,
      "step": 588288
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0359485260059374e-05,
      "loss": 4.0907,
      "step": 588800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0351099312548853e-05,
      "loss": 4.0955,
      "step": 589312
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0342713365038333e-05,
      "loss": 4.1042,
      "step": 589824
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.033432741752781e-05,
      "loss": 4.1173,
      "step": 590336
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.032594147001729e-05,
      "loss": 4.1106,
      "step": 590848
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.031755552250677e-05,
      "loss": 4.0811,
      "step": 591360
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.030918595379998e-05,
      "loss": 4.1085,
      "step": 591872
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.030080000628946e-05,
      "loss": 4.1135,
      "step": 592384
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.029241405877894e-05,
      "loss": 4.1098,
      "step": 592896
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.028402811126842e-05,
      "loss": 4.1109,
      "step": 593408
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.02756421637579e-05,
      "loss": 4.1061,
      "step": 593920
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.026725621624738e-05,
      "loss": 4.0973,
      "step": 594432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.025888664754059e-05,
      "loss": 4.1116,
      "step": 594944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.025050070003008e-05,
      "loss": 4.1068,
      "step": 595456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.024211475251956e-05,
      "loss": 4.1014,
      "step": 595968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.023372880500904e-05,
      "loss": 4.0942,
      "step": 596480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.022534285749852e-05,
      "loss": 4.0982,
      "step": 596992
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.021695690998799e-05,
      "loss": 4.1048,
      "step": 597504
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.020857096247747e-05,
      "loss": 4.1052,
      "step": 598016
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.020018501496695e-05,
      "loss": 4.1135,
      "step": 598528
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.019181544626016e-05,
      "loss": 4.1002,
      "step": 599040
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.018342949874964e-05,
      "loss": 4.0959,
      "step": 599552
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0175059930042856e-05,
      "loss": 4.1146,
      "step": 600064
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0166673982532336e-05,
      "loss": 4.0948,
      "step": 600576
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0158288035021816e-05,
      "loss": 4.1111,
      "step": 601088
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0149902087511296e-05,
      "loss": 4.1123,
      "step": 601600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.014153251880451e-05,
      "loss": 4.1092,
      "step": 602112
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.013316295009772e-05,
      "loss": 4.1139,
      "step": 602624
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.01247770025872e-05,
      "loss": 4.1025,
      "step": 603136
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.011639105507668e-05,
      "loss": 4.0933,
      "step": 603648
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.010800510756616e-05,
      "loss": 4.0973,
      "step": 604160
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0099619160055634e-05,
      "loss": 4.106,
      "step": 604672
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0091233212545114e-05,
      "loss": 4.1108,
      "step": 605184
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0082847265034594e-05,
      "loss": 4.1101,
      "step": 605696
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0074461317524074e-05,
      "loss": 4.1053,
      "step": 606208
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0066075370013554e-05,
      "loss": 4.099,
      "step": 606720
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0057689422503034e-05,
      "loss": 4.1034,
      "step": 607232
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0049303474992513e-05,
      "loss": 4.089,
      "step": 607744
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0040917527482e-05,
      "loss": 4.103,
      "step": 608256
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.003254795877521e-05,
      "loss": 4.1111,
      "step": 608768
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.002417839006842e-05,
      "loss": 4.1156,
      "step": 609280
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.00157924425579e-05,
      "loss": 4.0885,
      "step": 609792
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.000740649504738e-05,
      "loss": 4.1016,
      "step": 610304
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.111624240875244,
      "eval_runtime": 536.8692,
      "eval_samples_per_second": 710.771,
      "eval_steps_per_second": 22.212,
      "step": 610552
    }
  ],
  "logging_steps": 512,
  "max_steps": 3052726,
  "num_train_epochs": 9223372036854775807,
  "save_steps": 10,
  "total_flos": 2.519804656749205e+17,
  "trial_name": null,
  "trial_params": null
}