diff --git "a/checkpoint-1602720/trainer_state.json" "b/checkpoint-1602720/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/checkpoint-1602720/trainer_state.json"
@@ -0,0 +1,18973 @@
+{
+  "best_metric": 3.997973680496216,
+  "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/det-adj-noun/lstm/4/checkpoints/checkpoint-1602720",
+  "epoch": 0.025000606015738065,
+  "eval_steps": 10,
+  "global_step": 1602720,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.999998362119627e-05,
+      "loss": 10.8193,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.999161405248948e-05,
+      "loss": 7.5516,
+      "step": 512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.998322810497896e-05,
+      "loss": 7.0608,
+      "step": 1024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.997484215746844e-05,
+      "loss": 6.9965,
+      "step": 1536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.996645620995792e-05,
+      "loss": 6.9474,
+      "step": 2048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.99580702624474e-05,
+      "loss": 6.9067,
+      "step": 2560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.994968431493688e-05,
+      "loss": 6.7482,
+      "step": 3072
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.994129836742636e-05,
+      "loss": 6.6243,
+      "step": 3584
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.993291241991584e-05,
+      "loss": 6.5246,
+      "step": 4096
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.992452647240532e-05,
+      "loss": 6.4571,
+      "step": 4608
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.99161405248948e-05,
+      "loss": 6.4095,
+      "step": 5120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.990775457738428e-05,
+      "loss": 6.3562,
+      "step": 5632
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.989936862987376e-05,
+      "loss": 6.2988,
+      "step": 6144
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.989098268236324e-05,
+      "loss": 6.2415,
+      "step": 6656
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.988261311365645e-05,
+      "loss": 6.1819,
+      "step": 7168
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.987422716614593e-05,
+      "loss": 6.122,
+      "step": 7680
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.986584121863541e-05,
+      "loss": 6.074,
+      "step": 8192
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.985745527112489e-05,
+      "loss": 6.0331,
+      "step": 8704
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.984906932361437e-05,
+      "loss": 5.9837,
+      "step": 9216
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.984068337610385e-05,
+      "loss": 5.9423,
+      "step": 9728
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.983229742859333e-05,
+      "loss": 5.907,
+      "step": 10240
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.982391148108281e-05,
+      "loss": 5.86,
+      "step": 10752
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.981552553357229e-05,
+      "loss": 5.838,
+      "step": 11264
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.980713958606178e-05,
+      "loss": 5.7998,
+      "step": 11776
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.979875363855125e-05,
+      "loss": 5.7797,
+      "step": 12288
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.9790384069844466e-05,
+      "loss": 5.7431,
+      "step": 12800
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.9781998122333946e-05,
+      "loss": 5.7159,
+      "step": 13312
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.9773612174823426e-05,
+      "loss": 5.6918,
+      "step": 13824
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.97652262273129e-05,
+      "loss": 5.674,
+      "step": 14336
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.975684027980238e-05,
+      "loss": 5.6498,
+      "step": 14848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.974845433229186e-05,
+      "loss": 5.6279,
+      "step": 15360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.974008476358507e-05,
+      "loss": 5.617,
+      "step": 15872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.973169881607455e-05,
+      "loss": 5.58,
+      "step": 16384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.972331286856403e-05,
+      "loss": 5.573,
+      "step": 16896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9714926921053515e-05,
+      "loss": 5.5536,
+      "step": 17408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9706557352346724e-05,
+      "loss": 5.5374,
+      "step": 17920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9698171404836204e-05,
+      "loss": 5.5226,
+      "step": 18432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9689785457325684e-05,
+      "loss": 5.4817,
+      "step": 18944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9681399509815164e-05,
+      "loss": 5.4874,
+      "step": 19456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.967302994110837e-05,
+      "loss": 5.4534,
+      "step": 19968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.966464399359785e-05,
+      "loss": 5.4561,
+      "step": 20480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.965625804608733e-05,
+      "loss": 5.4345,
+      "step": 20992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.964787209857681e-05,
+      "loss": 5.4366,
+      "step": 21504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.963948615106629e-05,
+      "loss": 5.4068,
+      "step": 22016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.96311165823595e-05,
+      "loss": 5.3918,
+      "step": 22528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.962273063484898e-05,
+      "loss": 5.3965,
+      "step": 23040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.961434468733847e-05,
+      "loss": 5.3847,
+      "step": 23552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.960595873982795e-05,
+      "loss": 5.3722,
+      "step": 24064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.959758917112116e-05,
+      "loss": 5.3542,
+      "step": 24576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.958920322361064e-05,
+      "loss": 5.332,
+      "step": 25088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.958081727610012e-05,
+      "loss": 5.3432,
+      "step": 25600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.95724313285896e-05,
+      "loss": 5.3239,
+      "step": 26112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.956404538107908e-05,
+      "loss": 5.3225,
+      "step": 26624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.955565943356856e-05,
+      "loss": 5.2913,
+      "step": 27136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.954727348605804e-05,
+      "loss": 5.2958,
+      "step": 27648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.953888753854752e-05,
+      "loss": 5.273,
+      "step": 28160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9530501591037e-05,
+      "loss": 5.2939,
+      "step": 28672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.952211564352648e-05,
+      "loss": 5.258,
+      "step": 29184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9513746074819686e-05,
+      "loss": 5.2597,
+      "step": 29696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9505360127309166e-05,
+      "loss": 5.2452,
+      "step": 30208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9496974179798646e-05,
+      "loss": 5.2317,
+      "step": 30720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.948858823228813e-05,
+      "loss": 5.2339,
+      "step": 31232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.948020228477761e-05,
+      "loss": 5.2216,
+      "step": 31744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9471816337267086e-05,
+      "loss": 5.1945,
+      "step": 32256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9463430389756566e-05,
+      "loss": 5.2007,
+      "step": 32768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9455044442246046e-05,
+      "loss": 5.2104,
+      "step": 33280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9446658494735526e-05,
+      "loss": 5.1875,
+      "step": 33792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9438272547225006e-05,
+      "loss": 5.1765,
+      "step": 34304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9429902978518215e-05,
+      "loss": 5.159,
+      "step": 34816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9421517031007695e-05,
+      "loss": 5.1617,
+      "step": 35328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9413131083497175e-05,
+      "loss": 5.1777,
+      "step": 35840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9404745135986655e-05,
+      "loss": 5.1614,
+      "step": 36352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.939637556727987e-05,
+      "loss": 5.1471,
+      "step": 36864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.938798961976935e-05,
+      "loss": 5.15,
+      "step": 37376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.937960367225883e-05,
+      "loss": 5.1461,
+      "step": 37888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.937121772474831e-05,
+      "loss": 5.1278,
+      "step": 38400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.936283177723779e-05,
+      "loss": 5.1132,
+      "step": 38912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.935444582972727e-05,
+      "loss": 5.1184,
+      "step": 39424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.934605988221675e-05,
+      "loss": 5.0984,
+      "step": 39936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.933769031350996e-05,
+      "loss": 5.0998,
+      "step": 40448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.932930436599944e-05,
+      "loss": 5.1044,
+      "step": 40960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.932091841848892e-05,
+      "loss": 5.0902,
+      "step": 41472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.93125324709784e-05,
+      "loss": 5.0931,
+      "step": 41984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.930416290227161e-05,
+      "loss": 5.0813,
+      "step": 42496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9295793333564825e-05,
+      "loss": 5.0611,
+      "step": 43008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9287423764858034e-05,
+      "loss": 5.0642,
+      "step": 43520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9279037817347514e-05,
+      "loss": 5.0609,
+      "step": 44032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9270651869836994e-05,
+      "loss": 5.0672,
+      "step": 44544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9262265922326474e-05,
+      "loss": 5.0484,
+      "step": 45056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9253879974815954e-05,
+      "loss": 5.0472,
+      "step": 45568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9245494027305433e-05,
+      "loss": 5.0452,
+      "step": 46080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9237108079794913e-05,
+      "loss": 5.0372,
+      "step": 46592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.922872213228439e-05,
+      "loss": 5.0347,
+      "step": 47104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.922033618477387e-05,
+      "loss": 5.0237,
+      "step": 47616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.921195023726335e-05,
+      "loss": 5.0093,
+      "step": 48128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.920356428975283e-05,
+      "loss": 5.0178,
+      "step": 48640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.919517834224231e-05,
+      "loss": 5.0058,
+      "step": 49152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.918679239473179e-05,
+      "loss": 5.0017,
+      "step": 49664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.917840644722127e-05,
+      "loss": 4.9931,
+      "step": 50176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.917003687851449e-05,
+      "loss": 4.9923,
+      "step": 50688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.916165093100397e-05,
+      "loss": 4.9966,
+      "step": 51200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.915326498349345e-05,
+      "loss": 4.9846,
+      "step": 51712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.914487903598292e-05,
+      "loss": 4.9739,
+      "step": 52224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.91364930884724e-05,
+      "loss": 4.9696,
+      "step": 52736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.912810714096188e-05,
+      "loss": 4.9615,
+      "step": 53248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.911972119345136e-05,
+      "loss": 4.9481,
+      "step": 53760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.911135162474457e-05,
+      "loss": 4.9444,
+      "step": 54272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.910296567723405e-05,
+      "loss": 4.9483,
+      "step": 54784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.909457972972353e-05,
+      "loss": 4.9445,
+      "step": 55296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.908619378221301e-05,
+      "loss": 4.9323,
+      "step": 55808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.907780783470249e-05,
+      "loss": 4.9296,
+      "step": 56320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.906943826599571e-05,
+      "loss": 4.9301,
+      "step": 56832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.906105231848519e-05,
+      "loss": 4.9332,
+      "step": 57344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.905266637097467e-05,
+      "loss": 4.9281,
+      "step": 57856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9044280423464147e-05,
+      "loss": 4.9207,
+      "step": 58368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9035910854757356e-05,
+      "loss": 4.9198,
+      "step": 58880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9027541286050565e-05,
+      "loss": 4.9169,
+      "step": 59392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9019155338540045e-05,
+      "loss": 4.9064,
+      "step": 59904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9010769391029525e-05,
+      "loss": 4.8998,
+      "step": 60416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9002383443519005e-05,
+      "loss": 4.9036,
+      "step": 60928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8993997496008485e-05,
+      "loss": 4.9031,
+      "step": 61440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8985611548497965e-05,
+      "loss": 4.8806,
+      "step": 61952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8977225600987445e-05,
+      "loss": 4.8813,
+      "step": 62464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8968839653476924e-05,
+      "loss": 4.8789,
+      "step": 62976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.896047008477014e-05,
+      "loss": 4.871,
+      "step": 63488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.895208413725962e-05,
+      "loss": 4.8733,
+      "step": 64000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.89436981897491e-05,
+      "loss": 4.8763,
+      "step": 64512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.893531224223858e-05,
+      "loss": 4.8616,
+      "step": 65024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.892692629472806e-05,
+      "loss": 4.8619,
+      "step": 65536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.891854034721754e-05,
+      "loss": 4.8506,
+      "step": 66048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.891015439970702e-05,
+      "loss": 4.8671,
+      "step": 66560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.89017684521965e-05,
+      "loss": 4.8578,
+      "step": 67072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.889338250468598e-05,
+      "loss": 4.8493,
+      "step": 67584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.888501293597919e-05,
+      "loss": 4.8461,
+      "step": 68096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.887662698846867e-05,
+      "loss": 4.8456,
+      "step": 68608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.886824104095815e-05,
+      "loss": 4.8457,
+      "step": 69120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.885985509344763e-05,
+      "loss": 4.8345,
+      "step": 69632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8851485524740845e-05,
+      "loss": 4.8332,
+      "step": 70144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8843099577230325e-05,
+      "loss": 4.8331,
+      "step": 70656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8834713629719805e-05,
+      "loss": 4.8297,
+      "step": 71168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.882632768220928e-05,
+      "loss": 4.8287,
+      "step": 71680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.881794173469876e-05,
+      "loss": 4.809,
+      "step": 72192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.880955578718824e-05,
+      "loss": 4.8195,
+      "step": 72704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.880116983967772e-05,
+      "loss": 4.8045,
+      "step": 73216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.879281664977466e-05,
+      "loss": 4.7945,
+      "step": 73728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.878443070226414e-05,
+      "loss": 4.8131,
+      "step": 74240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.877604475475362e-05,
+      "loss": 4.8018,
+      "step": 74752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.87676588072431e-05,
+      "loss": 4.8021,
+      "step": 75264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.875927285973258e-05,
+      "loss": 4.8048,
+      "step": 75776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.875088691222206e-05,
+      "loss": 4.7824,
+      "step": 76288
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.7586212158203125,
+      "eval_runtime": 284.5791,
+      "eval_samples_per_second": 1340.896,
+      "eval_steps_per_second": 41.904,
+      "step": 76320
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.874250096471154e-05,
+      "loss": 4.7858,
+      "step": 76800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.873411501720102e-05,
+      "loss": 4.7817,
+      "step": 77312
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.872574544849423e-05,
+      "loss": 4.7995,
+      "step": 77824
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.871735950098371e-05,
+      "loss": 4.7787,
+      "step": 78336
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.870897355347319e-05,
+      "loss": 4.7864,
+      "step": 78848
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.870058760596267e-05,
+      "loss": 4.7659,
+      "step": 79360
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.869220165845215e-05,
+      "loss": 4.7714,
+      "step": 79872
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.868381571094163e-05,
+      "loss": 4.7606,
+      "step": 80384
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.867544614223484e-05,
+      "loss": 4.7651,
+      "step": 80896
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.866706019472432e-05,
+      "loss": 4.7658,
+      "step": 81408
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.86586742472138e-05,
+      "loss": 4.7685,
+      "step": 81920
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.865028829970329e-05,
+      "loss": 4.768,
+      "step": 82432
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8641918730996496e-05,
+      "loss": 4.7485,
+      "step": 82944
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8633532783485976e-05,
+      "loss": 4.7474,
+      "step": 83456
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8625146835975456e-05,
+      "loss": 4.7442,
+      "step": 83968
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8616760888464936e-05,
+      "loss": 4.7355,
+      "step": 84480
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8608374940954416e-05,
+      "loss": 4.7497,
+      "step": 84992
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8599988993443896e-05,
+      "loss": 4.7337,
+      "step": 85504
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8591603045933376e-05,
+      "loss": 4.7324,
+      "step": 86016
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8583217098422856e-05,
+      "loss": 4.7522,
+      "step": 86528
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8574847529716065e-05,
+      "loss": 4.7305,
+      "step": 87040
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8566461582205545e-05,
+      "loss": 4.7309,
+      "step": 87552
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8558075634695025e-05,
+      "loss": 4.7267,
+      "step": 88064
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8549689687184505e-05,
+      "loss": 4.733,
+      "step": 88576
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8541303739673985e-05,
+      "loss": 4.7209,
+      "step": 89088
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8532917792163465e-05,
+      "loss": 4.7152,
+      "step": 89600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8524531844652945e-05,
+      "loss": 4.7121,
+      "step": 90112
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8516145897142425e-05,
+      "loss": 4.7203,
+      "step": 90624
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.850779270723937e-05,
+      "loss": 4.7009,
+      "step": 91136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.849940675972885e-05,
+      "loss": 4.7072,
+      "step": 91648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.849103719102206e-05,
+      "loss": 4.7081,
+      "step": 92160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.848265124351154e-05,
+      "loss": 4.7029,
+      "step": 92672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.847426529600102e-05,
+      "loss": 4.7069,
+      "step": 93184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.84658793484905e-05,
+      "loss": 4.6969,
+      "step": 93696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.845750977978371e-05,
+      "loss": 4.6981,
+      "step": 94208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8449123832273194e-05,
+      "loss": 4.6981,
+      "step": 94720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8440737884762674e-05,
+      "loss": 4.6748,
+      "step": 95232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8432351937252154e-05,
+      "loss": 4.6861,
+      "step": 95744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8423965989741634e-05,
+      "loss": 4.6701,
+      "step": 96256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8415580042231114e-05,
+      "loss": 4.6805,
+      "step": 96768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.840719409472059e-05,
+      "loss": 4.6811,
+      "step": 97280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.839880814721007e-05,
+      "loss": 4.684,
+      "step": 97792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.839042219969955e-05,
+      "loss": 4.6743,
+      "step": 98304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.838205263099276e-05,
+      "loss": 4.6728,
+      "step": 98816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8373666683482236e-05,
+      "loss": 4.68,
+      "step": 99328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8365280735971716e-05,
+      "loss": 4.6741,
+      "step": 99840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8356894788461196e-05,
+      "loss": 4.6687,
+      "step": 100352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.834852521975441e-05,
+      "loss": 4.6621,
+      "step": 100864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.834013927224389e-05,
+      "loss": 4.6491,
+      "step": 101376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.833175332473337e-05,
+      "loss": 4.6673,
+      "step": 101888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.832336737722285e-05,
+      "loss": 4.6602,
+      "step": 102400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.831498142971233e-05,
+      "loss": 4.6592,
+      "step": 102912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.830659548220181e-05,
+      "loss": 4.6472,
+      "step": 103424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.829820953469129e-05,
+      "loss": 4.6551,
+      "step": 103936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.82898399659845e-05,
+      "loss": 4.639,
+      "step": 104448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.828145401847398e-05,
+      "loss": 4.6596,
+      "step": 104960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.827306807096346e-05,
+      "loss": 4.6403,
+      "step": 105472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.826468212345294e-05,
+      "loss": 4.6485,
+      "step": 105984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.825629617594242e-05,
+      "loss": 4.6394,
+      "step": 106496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.824792660723563e-05,
+      "loss": 4.6318,
+      "step": 107008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.823954065972511e-05,
+      "loss": 4.6338,
+      "step": 107520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.82311547122146e-05,
+      "loss": 4.6388,
+      "step": 108032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8222768764704077e-05,
+      "loss": 4.6132,
+      "step": 108544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8214382817193557e-05,
+      "loss": 4.6228,
+      "step": 109056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8205996869683037e-05,
+      "loss": 4.6354,
+      "step": 109568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8197610922172516e-05,
+      "loss": 4.6222,
+      "step": 110080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8189224974661996e-05,
+      "loss": 4.6159,
+      "step": 110592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8180855405955206e-05,
+      "loss": 4.6117,
+      "step": 111104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8172485837248415e-05,
+      "loss": 4.611,
+      "step": 111616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8164099889737895e-05,
+      "loss": 4.6349,
+      "step": 112128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8155713942227375e-05,
+      "loss": 4.6248,
+      "step": 112640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8147327994716854e-05,
+      "loss": 4.6156,
+      "step": 113152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8138942047206334e-05,
+      "loss": 4.6214,
+      "step": 113664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8130556099695814e-05,
+      "loss": 4.6215,
+      "step": 114176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8122170152185294e-05,
+      "loss": 4.6148,
+      "step": 114688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8113784204674774e-05,
+      "loss": 4.5982,
+      "step": 115200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8105398257164254e-05,
+      "loss": 4.6119,
+      "step": 115712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8097012309653734e-05,
+      "loss": 4.6027,
+      "step": 116224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8088626362143214e-05,
+      "loss": 4.5993,
+      "step": 116736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8080240414632694e-05,
+      "loss": 4.6105,
+      "step": 117248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.80718708459259e-05,
+      "loss": 4.601,
+      "step": 117760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.806348489841538e-05,
+      "loss": 4.6082,
+      "step": 118272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.80551153297086e-05,
+      "loss": 4.6023,
+      "step": 118784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.804672938219807e-05,
+      "loss": 4.5807,
+      "step": 119296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.803834343468755e-05,
+      "loss": 4.596,
+      "step": 119808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.802995748717703e-05,
+      "loss": 4.5893,
+      "step": 120320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.802157153966652e-05,
+      "loss": 4.6019,
+      "step": 120832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8013185592156e-05,
+      "loss": 4.5912,
+      "step": 121344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.800479964464548e-05,
+      "loss": 4.5936,
+      "step": 121856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.799641369713496e-05,
+      "loss": 4.5934,
+      "step": 122368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.798802774962444e-05,
+      "loss": 4.5871,
+      "step": 122880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.797965818091765e-05,
+      "loss": 4.5908,
+      "step": 123392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.797127223340713e-05,
+      "loss": 4.5842,
+      "step": 123904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.796288628589661e-05,
+      "loss": 4.5716,
+      "step": 124416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.795451671718982e-05,
+      "loss": 4.5831,
+      "step": 124928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.79461307696793e-05,
+      "loss": 4.5824,
+      "step": 125440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.793774482216878e-05,
+      "loss": 4.5725,
+      "step": 125952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.792935887465826e-05,
+      "loss": 4.5788,
+      "step": 126464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.792097292714774e-05,
+      "loss": 4.5766,
+      "step": 126976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7912586979637217e-05,
+      "loss": 4.5869,
+      "step": 127488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.790421741093043e-05,
+      "loss": 4.5712,
+      "step": 128000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.789583146341991e-05,
+      "loss": 4.5642,
+      "step": 128512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.788744551590939e-05,
+      "loss": 4.5657,
+      "step": 129024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.787905956839887e-05,
+      "loss": 4.5637,
+      "step": 129536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.787067362088835e-05,
+      "loss": 4.5453,
+      "step": 130048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.786228767337783e-05,
+      "loss": 4.5597,
+      "step": 130560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.785391810467104e-05,
+      "loss": 4.558,
+      "step": 131072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.784553215716052e-05,
+      "loss": 4.5568,
+      "step": 131584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.783714620965e-05,
+      "loss": 4.5518,
+      "step": 132096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.782876026213948e-05,
+      "loss": 4.5443,
+      "step": 132608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.782037431462896e-05,
+      "loss": 4.5504,
+      "step": 133120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7811988367118434e-05,
+      "loss": 4.5619,
+      "step": 133632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.780360241960792e-05,
+      "loss": 4.555,
+      "step": 134144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.77952164720974e-05,
+      "loss": 4.555,
+      "step": 134656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.778683052458688e-05,
+      "loss": 4.5506,
+      "step": 135168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.777844457707636e-05,
+      "loss": 4.5615,
+      "step": 135680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.777007500836957e-05,
+      "loss": 4.5449,
+      "step": 136192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.776168906085905e-05,
+      "loss": 4.5401,
+      "step": 136704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.775331949215226e-05,
+      "loss": 4.548,
+      "step": 137216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.774493354464174e-05,
+      "loss": 4.5506,
+      "step": 137728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.773654759713122e-05,
+      "loss": 4.5301,
+      "step": 138240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.77281616496207e-05,
+      "loss": 4.5322,
+      "step": 138752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.771977570211018e-05,
+      "loss": 4.5385,
+      "step": 139264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.771138975459966e-05,
+      "loss": 4.5306,
+      "step": 139776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.770300380708914e-05,
+      "loss": 4.5345,
+      "step": 140288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.769461785957862e-05,
+      "loss": 4.5406,
+      "step": 140800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7686231912068105e-05,
+      "loss": 4.5222,
+      "step": 141312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7677845964557585e-05,
+      "loss": 4.5323,
+      "step": 141824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7669460017047065e-05,
+      "loss": 4.5197,
+      "step": 142336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7661074069536545e-05,
+      "loss": 4.5439,
+      "step": 142848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7652704500829754e-05,
+      "loss": 4.5354,
+      "step": 143360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7644318553319234e-05,
+      "loss": 4.5236,
+      "step": 143872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7635932605808714e-05,
+      "loss": 4.5264,
+      "step": 144384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7627546658298194e-05,
+      "loss": 4.5246,
+      "step": 144896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7619160710787674e-05,
+      "loss": 4.5323,
+      "step": 145408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.761077476327715e-05,
+      "loss": 4.5264,
+      "step": 145920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.760238881576663e-05,
+      "loss": 4.5218,
+      "step": 146432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.759400286825611e-05,
+      "loss": 4.5206,
+      "step": 146944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.758563329954932e-05,
+      "loss": 4.5192,
+      "step": 147456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.75772473520388e-05,
+      "loss": 4.5218,
+      "step": 147968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.756887778333202e-05,
+      "loss": 4.5086,
+      "step": 148480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.75604918358215e-05,
+      "loss": 4.5199,
+      "step": 148992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.755210588831097e-05,
+      "loss": 4.507,
+      "step": 149504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.754371994080045e-05,
+      "loss": 4.4974,
+      "step": 150016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.753535037209367e-05,
+      "loss": 4.5191,
+      "step": 150528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.752696442458315e-05,
+      "loss": 4.5092,
+      "step": 151040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.751857847707262e-05,
+      "loss": 4.5107,
+      "step": 151552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.75101925295621e-05,
+      "loss": 4.5138,
+      "step": 152064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.750180658205158e-05,
+      "loss": 4.4996,
+      "step": 152576
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.477122783660889,
+      "eval_runtime": 284.0973,
+      "eval_samples_per_second": 1343.17,
+      "eval_steps_per_second": 41.975,
+      "step": 152640
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.749342063454106e-05,
+      "loss": 4.5034,
+      "step": 153088
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.748503468703054e-05,
+      "loss": 4.4938,
+      "step": 153600
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.747664873952003e-05,
+      "loss": 4.5195,
+      "step": 154112
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.746826279200951e-05,
+      "loss": 4.4957,
+      "step": 154624
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.745987684449899e-05,
+      "loss": 4.5104,
+      "step": 155136
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.745149089698847e-05,
+      "loss": 4.492,
+      "step": 155648
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.744310494947795e-05,
+      "loss": 4.4931,
+      "step": 156160
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.743471900196743e-05,
+      "loss": 4.4856,
+      "step": 156672
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.742633305445691e-05,
+      "loss": 4.4989,
+      "step": 157184
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.741794710694639e-05,
+      "loss": 4.4967,
+      "step": 157696
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.740956115943586e-05,
+      "loss": 4.4993,
+      "step": 158208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.740117521192534e-05,
+      "loss": 4.502,
+      "step": 158720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7392805643218556e-05,
+      "loss": 4.4836,
+      "step": 159232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7384419695708036e-05,
+      "loss": 4.4876,
+      "step": 159744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.737603374819751e-05,
+      "loss": 4.482,
+      "step": 160256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.736764780068699e-05,
+      "loss": 4.4787,
+      "step": 160768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7359278231980205e-05,
+      "loss": 4.4878,
+      "step": 161280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7350892284469685e-05,
+      "loss": 4.4798,
+      "step": 161792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7342506336959165e-05,
+      "loss": 4.4774,
+      "step": 162304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7334120389448645e-05,
+      "loss": 4.4946,
+      "step": 162816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7325734441938125e-05,
+      "loss": 4.4796,
+      "step": 163328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7317348494427605e-05,
+      "loss": 4.4803,
+      "step": 163840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7308962546917085e-05,
+      "loss": 4.4793,
+      "step": 164352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7300576599406565e-05,
+      "loss": 4.4877,
+      "step": 164864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7292207030699774e-05,
+      "loss": 4.4737,
+      "step": 165376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7283821083189254e-05,
+      "loss": 4.4718,
+      "step": 165888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.727545151448246e-05,
+      "loss": 4.4699,
+      "step": 166400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.726706556697194e-05,
+      "loss": 4.4722,
+      "step": 166912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.725867961946143e-05,
+      "loss": 4.4626,
+      "step": 167424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.725029367195091e-05,
+      "loss": 4.4676,
+      "step": 167936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.724190772444039e-05,
+      "loss": 4.4688,
+      "step": 168448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.723352177692987e-05,
+      "loss": 4.4666,
+      "step": 168960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.722513582941935e-05,
+      "loss": 4.4743,
+      "step": 169472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.721674988190883e-05,
+      "loss": 4.4613,
+      "step": 169984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.720838031320204e-05,
+      "loss": 4.464,
+      "step": 170496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.719999436569152e-05,
+      "loss": 4.4636,
+      "step": 171008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7191608418181e-05,
+      "loss": 4.4486,
+      "step": 171520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.718322247067048e-05,
+      "loss": 4.4528,
+      "step": 172032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.717483652315996e-05,
+      "loss": 4.4447,
+      "step": 172544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.716645057564944e-05,
+      "loss": 4.4516,
+      "step": 173056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.715808100694265e-05,
+      "loss": 4.4593,
+      "step": 173568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.714969505943213e-05,
+      "loss": 4.4614,
+      "step": 174080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7141309111921614e-05,
+      "loss": 4.4513,
+      "step": 174592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7132923164411094e-05,
+      "loss": 4.451,
+      "step": 175104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7124537216900574e-05,
+      "loss": 4.4623,
+      "step": 175616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.711615126939005e-05,
+      "loss": 4.4477,
+      "step": 176128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.710778170068326e-05,
+      "loss": 4.4524,
+      "step": 176640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.709939575317274e-05,
+      "loss": 4.4473,
+      "step": 177152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.709100980566222e-05,
+      "loss": 4.4301,
+      "step": 177664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7082623858151696e-05,
+      "loss": 4.4518,
+      "step": 178176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7074237910641176e-05,
+      "loss": 4.4431,
+      "step": 178688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7065851963130656e-05,
+      "loss": 4.4462,
+      "step": 179200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7057466015620136e-05,
+      "loss": 4.4387,
+      "step": 179712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7049080068109616e-05,
+      "loss": 4.4435,
+      "step": 180224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.704071049940283e-05,
+      "loss": 4.426,
+      "step": 180736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.703232455189231e-05,
+      "loss": 4.4539,
+      "step": 181248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.702393860438179e-05,
+      "loss": 4.4293,
+      "step": 181760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.701555265687127e-05,
+      "loss": 4.4453,
+      "step": 182272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.700716670936075e-05,
+      "loss": 4.4311,
+      "step": 182784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.699878076185023e-05,
+      "loss": 4.4313,
+      "step": 183296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.699039481433971e-05,
+      "loss": 4.4273,
+      "step": 183808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.698202524563292e-05,
+      "loss": 4.4378,
+      "step": 184320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.69736392981224e-05,
+      "loss": 4.4115,
+      "step": 184832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.696525335061188e-05,
+      "loss": 4.4247,
+      "step": 185344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.695686740310136e-05,
+      "loss": 4.4331,
+      "step": 185856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.694848145559084e-05,
+      "loss": 4.4248,
+      "step": 186368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.694011188688405e-05,
+      "loss": 4.4185,
+      "step": 186880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.693172593937353e-05,
+      "loss": 4.4139,
+      "step": 187392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6923339991863017e-05,
+      "loss": 4.416,
+      "step": 187904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6914970423156226e-05,
+      "loss": 4.4412,
+      "step": 188416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6906584475645706e-05,
+      "loss": 4.4277,
+      "step": 188928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6898198528135186e-05,
+      "loss": 4.4208,
+      "step": 189440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6889812580624665e-05,
+      "loss": 4.4292,
+      "step": 189952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6881426633114145e-05,
+      "loss": 4.4301,
+      "step": 190464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6873040685603625e-05,
+      "loss": 4.425,
+      "step": 190976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6864654738093105e-05,
+      "loss": 4.4071,
+      "step": 191488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6856268790582585e-05,
+      "loss": 4.4241,
+      "step": 192000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.684788284307206e-05,
+      "loss": 4.4192,
+      "step": 192512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.683949689556154e-05,
+      "loss": 4.4086,
+      "step": 193024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.683111094805102e-05,
+      "loss": 4.4253,
+      "step": 193536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.68227250005405e-05,
+      "loss": 4.4154,
+      "step": 194048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6814355431833714e-05,
+      "loss": 4.4202,
+      "step": 194560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6805969484323194e-05,
+      "loss": 4.4221,
+      "step": 195072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6797583536812674e-05,
+      "loss": 4.4018,
+      "step": 195584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6789197589302154e-05,
+      "loss": 4.4128,
+      "step": 196096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6780811641791634e-05,
+      "loss": 4.4072,
+      "step": 196608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.677245845188858e-05,
+      "loss": 4.417,
+      "step": 197120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.676407250437806e-05,
+      "loss": 4.4162,
+      "step": 197632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.675568655686753e-05,
+      "loss": 4.4116,
+      "step": 198144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.674730060935701e-05,
+      "loss": 4.414,
+      "step": 198656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.673891466184649e-05,
+      "loss": 4.41,
+      "step": 199168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.673052871433597e-05,
+      "loss": 4.4087,
+      "step": 199680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.672214276682545e-05,
+      "loss": 4.404,
+      "step": 200192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.671377319811867e-05,
+      "loss": 4.4042,
+      "step": 200704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.670538725060815e-05,
+      "loss": 4.4092,
+      "step": 201216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.669700130309763e-05,
+      "loss": 4.4033,
+      "step": 201728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.668861535558711e-05,
+      "loss": 4.3992,
+      "step": 202240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.668022940807659e-05,
+      "loss": 4.407,
+      "step": 202752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.66718598393698e-05,
+      "loss": 4.4057,
+      "step": 203264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.666347389185928e-05,
+      "loss": 4.418,
+      "step": 203776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.665508794434876e-05,
+      "loss": 4.3983,
+      "step": 204288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.664670199683824e-05,
+      "loss": 4.3957,
+      "step": 204800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.663831604932772e-05,
+      "loss": 4.3978,
+      "step": 205312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6629930101817197e-05,
+      "loss": 4.3955,
+      "step": 205824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6621544154306677e-05,
+      "loss": 4.3766,
+      "step": 206336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.661317458559989e-05,
+      "loss": 4.3916,
+      "step": 206848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.660478863808937e-05,
+      "loss": 4.3944,
+      "step": 207360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.659640269057885e-05,
+      "loss": 4.3881,
+      "step": 207872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.658801674306833e-05,
+      "loss": 4.3926,
+      "step": 208384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.657964717436154e-05,
+      "loss": 4.3813,
+      "step": 208896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.657126122685102e-05,
+      "loss": 4.3825,
+      "step": 209408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.65628752793405e-05,
+      "loss": 4.4015,
+      "step": 209920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.655448933182998e-05,
+      "loss": 4.3884,
+      "step": 210432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.654610338431946e-05,
+      "loss": 4.397,
+      "step": 210944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.653773381561267e-05,
+      "loss": 4.3886,
+      "step": 211456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.652934786810215e-05,
+      "loss": 4.402,
+      "step": 211968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.652096192059163e-05,
+      "loss": 4.387,
+      "step": 212480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.651257597308111e-05,
+      "loss": 4.3835,
+      "step": 212992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6504206404374326e-05,
+      "loss": 4.3867,
+      "step": 213504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6495820456863806e-05,
+      "loss": 4.3922,
+      "step": 214016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6487434509353286e-05,
+      "loss": 4.3747,
+      "step": 214528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6479048561842766e-05,
+      "loss": 4.3755,
+      "step": 215040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6470662614332246e-05,
+      "loss": 4.3776,
+      "step": 215552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6462293045625455e-05,
+      "loss": 4.3811,
+      "step": 216064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6453907098114935e-05,
+      "loss": 4.3804,
+      "step": 216576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6445521150604415e-05,
+      "loss": 4.3877,
+      "step": 217088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6437151581897624e-05,
+      "loss": 4.3674,
+      "step": 217600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6428765634387104e-05,
+      "loss": 4.3772,
+      "step": 218112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6420379686876584e-05,
+      "loss": 4.3676,
+      "step": 218624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6411993739366064e-05,
+      "loss": 4.3908,
+      "step": 219136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6403607791855544e-05,
+      "loss": 4.3882,
+      "step": 219648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6395221844345024e-05,
+      "loss": 4.3741,
+      "step": 220160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6386835896834504e-05,
+      "loss": 4.3715,
+      "step": 220672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6378449949323984e-05,
+      "loss": 4.3762,
+      "step": 221184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6370064001813464e-05,
+      "loss": 4.3806,
+      "step": 221696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6361678054302944e-05,
+      "loss": 4.3811,
+      "step": 222208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6353292106792424e-05,
+      "loss": 4.3708,
+      "step": 222720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6344906159281903e-05,
+      "loss": 4.3737,
+      "step": 223232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.633653659057511e-05,
+      "loss": 4.3724,
+      "step": 223744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.632815064306459e-05,
+      "loss": 4.378,
+      "step": 224256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.631976469555407e-05,
+      "loss": 4.363,
+      "step": 224768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.631137874804355e-05,
+      "loss": 4.3717,
+      "step": 225280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.630299280053303e-05,
+      "loss": 4.366,
+      "step": 225792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.629462323182625e-05,
+      "loss": 4.3498,
+      "step": 226304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.628623728431573e-05,
+      "loss": 4.3764,
+      "step": 226816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.627785133680521e-05,
+      "loss": 4.3654,
+      "step": 227328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.626946538929469e-05,
+      "loss": 4.368,
+      "step": 227840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.62610958205879e-05,
+      "loss": 4.372,
+      "step": 228352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.625270987307738e-05,
+      "loss": 4.3559,
+      "step": 228864
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.342620849609375,
+      "eval_runtime": 328.6628,
+      "eval_samples_per_second": 1161.041,
+      "eval_steps_per_second": 36.283,
+      "step": 228960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6244340304370586e-05,
+      "loss": 4.3637,
+      "step": 229376
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6235954356860066e-05,
+      "loss": 4.3516,
+      "step": 229888
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6227568409349546e-05,
+      "loss": 4.378,
+      "step": 230400
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6219182461839026e-05,
+      "loss": 4.3557,
+      "step": 230912
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6210796514328506e-05,
+      "loss": 4.3777,
+      "step": 231424
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6202410566817986e-05,
+      "loss": 4.3486,
+      "step": 231936
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6194024619307466e-05,
+      "loss": 4.3571,
+      "step": 232448
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6185638671796946e-05,
+      "loss": 4.3431,
+      "step": 232960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.617726910309016e-05,
+      "loss": 4.364,
+      "step": 233472
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.616888315557964e-05,
+      "loss": 4.3589,
+      "step": 233984
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.616049720806912e-05,
+      "loss": 4.3591,
+      "step": 234496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.61521112605586e-05,
+      "loss": 4.3727,
+      "step": 235008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.614374169185181e-05,
+      "loss": 4.3465,
+      "step": 235520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.613535574434129e-05,
+      "loss": 4.3538,
+      "step": 236032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.612696979683077e-05,
+      "loss": 4.3477,
+      "step": 236544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.611858384932025e-05,
+      "loss": 4.3456,
+      "step": 237056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6110197901809724e-05,
+      "loss": 4.3508,
+      "step": 237568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6101811954299204e-05,
+      "loss": 4.3495,
+      "step": 238080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6093426006788684e-05,
+      "loss": 4.3448,
+      "step": 238592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.608504005927817e-05,
+      "loss": 4.3603,
+      "step": 239104
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.607667049057138e-05,
+      "loss": 4.3495,
+      "step": 239616
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.606828454306086e-05,
+      "loss": 4.3477,
+      "step": 240128
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.605989859555034e-05,
+      "loss": 4.3514,
+      "step": 240640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.605151264803982e-05,
+      "loss": 4.3551,
+      "step": 241152
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.60431267005293e-05,
+      "loss": 4.3433,
+      "step": 241664
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.603475713182251e-05,
+      "loss": 4.3427,
+      "step": 242176
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.602637118431199e-05,
+      "loss": 4.3431,
+      "step": 242688
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.601798523680147e-05,
+      "loss": 4.3384,
+      "step": 243200
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.600959928929095e-05,
+      "loss": 4.3377,
+      "step": 243712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.600121334178043e-05,
+      "loss": 4.337,
+      "step": 244224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.599282739426991e-05,
+      "loss": 4.3473,
+      "step": 244736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.598444144675939e-05,
+      "loss": 4.3387,
+      "step": 245248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.597605549924887e-05,
+      "loss": 4.3484,
+      "step": 245760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.596766955173835e-05,
+      "loss": 4.3374,
+      "step": 246272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5959299983031564e-05,
+      "loss": 4.3368,
+      "step": 246784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.595093041432477e-05,
+      "loss": 4.3366,
+      "step": 247296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.594254446681425e-05,
+      "loss": 4.3289,
+      "step": 247808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.593415851930373e-05,
+      "loss": 4.3267,
+      "step": 248320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.592577257179321e-05,
+      "loss": 4.3212,
+      "step": 248832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.591740300308642e-05,
+      "loss": 4.3275,
+      "step": 249344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.59090170555759e-05,
+      "loss": 4.3352,
+      "step": 249856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.590063110806538e-05,
+      "loss": 4.3358,
+      "step": 250368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.589224516055486e-05,
+      "loss": 4.3318,
+      "step": 250880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.588385921304434e-05,
+      "loss": 4.3309,
+      "step": 251392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.587547326553382e-05,
+      "loss": 4.3412,
+      "step": 251904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.58670873180233e-05,
+      "loss": 4.327,
+      "step": 252416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.585870137051279e-05,
+      "loss": 4.331,
+      "step": 252928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.585031542300227e-05,
+      "loss": 4.3247,
+      "step": 253440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.584194585429548e-05,
+      "loss": 4.3122,
+      "step": 253952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.583355990678496e-05,
+      "loss": 4.3368,
+      "step": 254464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.582517395927444e-05,
+      "loss": 4.3211,
+      "step": 254976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.581678801176391e-05,
+      "loss": 4.3279,
+      "step": 255488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.580840206425339e-05,
+      "loss": 4.3179,
+      "step": 256000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.580001611674287e-05,
+      "loss": 4.3295,
+      "step": 256512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.579163016923235e-05,
+      "loss": 4.3061,
+      "step": 257024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.578324422172183e-05,
+      "loss": 4.3323,
+      "step": 257536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.577487465301504e-05,
+      "loss": 4.3158,
+      "step": 258048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5766488705504526e-05,
+      "loss": 4.3255,
+      "step": 258560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5758102757994006e-05,
+      "loss": 4.3204,
+      "step": 259072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5749716810483486e-05,
+      "loss": 4.3113,
+      "step": 259584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5741347241776695e-05,
+      "loss": 4.3115,
+      "step": 260096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.573297767306991e-05,
+      "loss": 4.3288,
+      "step": 260608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5724591725559384e-05,
+      "loss": 4.2992,
+      "step": 261120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5716205778048864e-05,
+      "loss": 4.3043,
+      "step": 261632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5707819830538344e-05,
+      "loss": 4.3198,
+      "step": 262144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5699433883027824e-05,
+      "loss": 4.3128,
+      "step": 262656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5691047935517304e-05,
+      "loss": 4.3036,
+      "step": 263168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.568267836681051e-05,
+      "loss": 4.3055,
+      "step": 263680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.567429241929999e-05,
+      "loss": 4.3026,
+      "step": 264192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.566590647178948e-05,
+      "loss": 4.3241,
+      "step": 264704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.565752052427896e-05,
+      "loss": 4.3143,
+      "step": 265216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.564915095557217e-05,
+      "loss": 4.3091,
+      "step": 265728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.564076500806165e-05,
+      "loss": 4.3166,
+      "step": 266240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.563237906055113e-05,
+      "loss": 4.3194,
+      "step": 266752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.562399311304061e-05,
+      "loss": 4.3181,
+      "step": 267264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.561560716553009e-05,
+      "loss": 4.2971,
+      "step": 267776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.56072375968233e-05,
+      "loss": 4.3113,
+      "step": 268288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.559886802811651e-05,
+      "loss": 4.3075,
+      "step": 268800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.559048208060599e-05,
+      "loss": 4.2986,
+      "step": 269312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.558209613309547e-05,
+      "loss": 4.3192,
+      "step": 269824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.557371018558495e-05,
+      "loss": 4.3041,
+      "step": 270336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5565324238074434e-05,
+      "loss": 4.3127,
+      "step": 270848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5556938290563914e-05,
+      "loss": 4.3135,
+      "step": 271360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5548552343053394e-05,
+      "loss": 4.2968,
+      "step": 271872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5540166395542874e-05,
+      "loss": 4.3043,
+      "step": 272384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.553179682683608e-05,
+      "loss": 4.3007,
+      "step": 272896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.552341087932556e-05,
+      "loss": 4.306,
+      "step": 273408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.551502493181504e-05,
+      "loss": 4.307,
+      "step": 273920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.550663898430452e-05,
+      "loss": 4.3039,
+      "step": 274432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5498253036794e-05,
+      "loss": 4.3074,
+      "step": 274944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.548986708928348e-05,
+      "loss": 4.3083,
+      "step": 275456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.548149752057669e-05,
+      "loss": 4.3017,
+      "step": 275968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.547311157306617e-05,
+      "loss": 4.3012,
+      "step": 276480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.546472562555565e-05,
+      "loss": 4.2992,
+      "step": 276992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.545633967804513e-05,
+      "loss": 4.3078,
+      "step": 277504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.544795373053462e-05,
+      "loss": 4.2985,
+      "step": 278016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.54395677830241e-05,
+      "loss": 4.2972,
+      "step": 278528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.543118183551358e-05,
+      "loss": 4.3025,
+      "step": 279040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.542279588800305e-05,
+      "loss": 4.2975,
+      "step": 279552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5414442698099996e-05,
+      "loss": 4.3149,
+      "step": 280064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5406056750589476e-05,
+      "loss": 4.2963,
+      "step": 280576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5397670803078956e-05,
+      "loss": 4.2964,
+      "step": 281088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5389284855568436e-05,
+      "loss": 4.2931,
+      "step": 281600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5380915286861645e-05,
+      "loss": 4.2962,
+      "step": 282112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5372529339351125e-05,
+      "loss": 4.2754,
+      "step": 282624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5364143391840605e-05,
+      "loss": 4.2877,
+      "step": 283136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5355757444330085e-05,
+      "loss": 4.2944,
+      "step": 283648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.534737149681957e-05,
+      "loss": 4.2899,
+      "step": 284160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.533900192811278e-05,
+      "loss": 4.2875,
+      "step": 284672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.533061598060226e-05,
+      "loss": 4.2821,
+      "step": 285184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.532223003309174e-05,
+      "loss": 4.2831,
+      "step": 285696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.531384408558122e-05,
+      "loss": 4.3043,
+      "step": 286208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5305458138070694e-05,
+      "loss": 4.285,
+      "step": 286720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.529708856936391e-05,
+      "loss": 4.2961,
+      "step": 287232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.528870262185339e-05,
+      "loss": 4.2903,
+      "step": 287744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.528031667434287e-05,
+      "loss": 4.3052,
+      "step": 288256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.527193072683234e-05,
+      "loss": 4.2912,
+      "step": 288768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.526354477932182e-05,
+      "loss": 4.2797,
+      "step": 289280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.525515883181131e-05,
+      "loss": 4.2912,
+      "step": 289792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.524677288430079e-05,
+      "loss": 4.2951,
+      "step": 290304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.523838693679027e-05,
+      "loss": 4.2758,
+      "step": 290816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.523001736808348e-05,
+      "loss": 4.2784,
+      "step": 291328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.522163142057296e-05,
+      "loss": 4.2834,
+      "step": 291840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5213278230669904e-05,
+      "loss": 4.2845,
+      "step": 292352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5204892283159384e-05,
+      "loss": 4.2796,
+      "step": 292864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5196506335648863e-05,
+      "loss": 4.2964,
+      "step": 293376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5188120388138343e-05,
+      "loss": 4.2689,
+      "step": 293888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5179734440627817e-05,
+      "loss": 4.2831,
+      "step": 294400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5171348493117297e-05,
+      "loss": 4.2709,
+      "step": 294912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5162962545606777e-05,
+      "loss": 4.2951,
+      "step": 295424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5154576598096256e-05,
+      "loss": 4.2923,
+      "step": 295936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.514620702938947e-05,
+      "loss": 4.282,
+      "step": 296448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.513782108187895e-05,
+      "loss": 4.2733,
+      "step": 296960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.512943513436843e-05,
+      "loss": 4.2861,
+      "step": 297472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.512104918685791e-05,
+      "loss": 4.2863,
+      "step": 297984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.511267961815112e-05,
+      "loss": 4.2851,
+      "step": 298496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.51042936706406e-05,
+      "loss": 4.281,
+      "step": 299008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.509590772313008e-05,
+      "loss": 4.2795,
+      "step": 299520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.508752177561956e-05,
+      "loss": 4.2799,
+      "step": 300032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5079168585716506e-05,
+      "loss": 4.2888,
+      "step": 300544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5070782638205986e-05,
+      "loss": 4.2702,
+      "step": 301056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5062396690695466e-05,
+      "loss": 4.2753,
+      "step": 301568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5054010743184946e-05,
+      "loss": 4.2727,
+      "step": 302080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5045624795674426e-05,
+      "loss": 4.2644,
+      "step": 302592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.503725522696764e-05,
+      "loss": 4.2813,
+      "step": 303104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5028869279457115e-05,
+      "loss": 4.2736,
+      "step": 303616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5020483331946595e-05,
+      "loss": 4.276,
+      "step": 304128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5012097384436075e-05,
+      "loss": 4.2774,
+      "step": 304640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5003711436925555e-05,
+      "loss": 4.2716,
+      "step": 305152
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.259642124176025,
+      "eval_runtime": 329.0937,
+      "eval_samples_per_second": 1159.521,
+      "eval_steps_per_second": 36.236,
+      "step": 305280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4995325489415035e-05,
+      "loss": 4.2743,
+      "step": 305664
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4986939541904515e-05,
+      "loss": 4.2592,
+      "step": 306176
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4978553594393995e-05,
+      "loss": 4.2854,
+      "step": 306688
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4970167646883475e-05,
+      "loss": 4.2676,
+      "step": 307200
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4961781699372955e-05,
+      "loss": 4.2856,
+      "step": 307712
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4953395751862435e-05,
+      "loss": 4.2675,
+      "step": 308224
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4945009804351915e-05,
+      "loss": 4.2645,
+      "step": 308736
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4936623856841395e-05,
+      "loss": 4.2576,
+      "step": 309248
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.492823790933088e-05,
+      "loss": 4.2703,
+      "step": 309760
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4919851961820354e-05,
+      "loss": 4.2739,
+      "step": 310272
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4911466014309834e-05,
+      "loss": 4.2716,
+      "step": 310784
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4903080066799314e-05,
+      "loss": 4.2789,
+      "step": 311296
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.489471049809253e-05,
+      "loss": 4.261,
+      "step": 311808
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4886324550582003e-05,
+      "loss": 4.2647,
+      "step": 312320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4877938603071483e-05,
+      "loss": 4.26,
+      "step": 312832
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.486955265556096e-05,
+      "loss": 4.254,
+      "step": 313344
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.486116670805044e-05,
+      "loss": 4.2663,
+      "step": 313856
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.485278076053992e-05,
+      "loss": 4.2651,
+      "step": 314368
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.48443948130294e-05,
+      "loss": 4.2581,
+      "step": 314880
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.483600886551888e-05,
+      "loss": 4.2715,
+      "step": 315392
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.48276392968121e-05,
+      "loss": 4.2654,
+      "step": 315904
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.481925334930158e-05,
+      "loss": 4.2642,
+      "step": 316416
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.481086740179106e-05,
+      "loss": 4.2624,
+      "step": 316928
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.480248145428054e-05,
+      "loss": 4.2702,
+      "step": 317440
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.479409550677002e-05,
+      "loss": 4.2553,
+      "step": 317952
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.47857095592595e-05,
+      "loss": 4.2649,
+      "step": 318464
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.477732361174898e-05,
+      "loss": 4.2518,
+      "step": 318976
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.476893766423846e-05,
+      "loss": 4.2563,
+      "step": 319488
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.476056809553167e-05,
+      "loss": 4.2545,
+      "step": 320000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.475219852682488e-05,
+      "loss": 4.2552,
+      "step": 320512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4743828958118086e-05,
+      "loss": 4.2586,
+      "step": 321024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.473544301060757e-05,
+      "loss": 4.2581,
+      "step": 321536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.472705706309705e-05,
+      "loss": 4.2619,
+      "step": 322048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.471867111558653e-05,
+      "loss": 4.2554,
+      "step": 322560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.471028516807601e-05,
+      "loss": 4.2494,
+      "step": 323072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.470189922056549e-05,
+      "loss": 4.2566,
+      "step": 323584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.469351327305497e-05,
+      "loss": 4.2471,
+      "step": 324096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.468512732554445e-05,
+      "loss": 4.2418,
+      "step": 324608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.467674137803393e-05,
+      "loss": 4.2419,
+      "step": 325120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.466835543052341e-05,
+      "loss": 4.2428,
+      "step": 325632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.465996948301289e-05,
+      "loss": 4.2544,
+      "step": 326144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4651583535502366e-05,
+      "loss": 4.2563,
+      "step": 326656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.464323034559931e-05,
+      "loss": 4.2503,
+      "step": 327168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.463484439808879e-05,
+      "loss": 4.2469,
+      "step": 327680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.462645845057827e-05,
+      "loss": 4.2604,
+      "step": 328192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.461807250306775e-05,
+      "loss": 4.2491,
+      "step": 328704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.460968655555724e-05,
+      "loss": 4.2494,
+      "step": 329216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.460130060804672e-05,
+      "loss": 4.2433,
+      "step": 329728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.459291466053619e-05,
+      "loss": 4.2309,
+      "step": 330240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.458452871302567e-05,
+      "loss": 4.2574,
+      "step": 330752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4576159144318886e-05,
+      "loss": 4.2408,
+      "step": 331264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4567773196808366e-05,
+      "loss": 4.249,
+      "step": 331776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.455938724929784e-05,
+      "loss": 4.2422,
+      "step": 332288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.455100130178732e-05,
+      "loss": 4.2414,
+      "step": 332800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.45426153542768e-05,
+      "loss": 4.2338,
+      "step": 333312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.453422940676628e-05,
+      "loss": 4.2464,
+      "step": 333824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.452584345925576e-05,
+      "loss": 4.2381,
+      "step": 334336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4517473890548975e-05,
+      "loss": 4.2506,
+      "step": 334848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4509087943038455e-05,
+      "loss": 4.2421,
+      "step": 335360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4500701995527935e-05,
+      "loss": 4.2348,
+      "step": 335872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4492316048017415e-05,
+      "loss": 4.2306,
+      "step": 336384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4483946479310624e-05,
+      "loss": 4.2501,
+      "step": 336896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4475560531800104e-05,
+      "loss": 4.2249,
+      "step": 337408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4467174584289584e-05,
+      "loss": 4.227,
+      "step": 337920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4458788636779064e-05,
+      "loss": 4.2386,
+      "step": 338432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.445041906807227e-05,
+      "loss": 4.24,
+      "step": 338944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.444203312056175e-05,
+      "loss": 4.228,
+      "step": 339456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.443364717305123e-05,
+      "loss": 4.2267,
+      "step": 339968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.442526122554071e-05,
+      "loss": 4.225,
+      "step": 340480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.441689165683393e-05,
+      "loss": 4.244,
+      "step": 340992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.440850570932341e-05,
+      "loss": 4.2408,
+      "step": 341504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.440011976181289e-05,
+      "loss": 4.2361,
+      "step": 342016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.439173381430237e-05,
+      "loss": 4.2391,
+      "step": 342528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.438334786679185e-05,
+      "loss": 4.2414,
+      "step": 343040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.437496191928133e-05,
+      "loss": 4.2461,
+      "step": 343552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.436659235057454e-05,
+      "loss": 4.2254,
+      "step": 344064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.435820640306402e-05,
+      "loss": 4.2319,
+      "step": 344576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.43498204555535e-05,
+      "loss": 4.2343,
+      "step": 345088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.434143450804298e-05,
+      "loss": 4.2213,
+      "step": 345600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.433304856053246e-05,
+      "loss": 4.2467,
+      "step": 346112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.432466261302194e-05,
+      "loss": 4.2288,
+      "step": 346624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.431627666551142e-05,
+      "loss": 4.2375,
+      "step": 347136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.43078907180009e-05,
+      "loss": 4.2457,
+      "step": 347648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.429952114929411e-05,
+      "loss": 4.2228,
+      "step": 348160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.429113520178359e-05,
+      "loss": 4.2289,
+      "step": 348672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.42827656330768e-05,
+      "loss": 4.2248,
+      "step": 349184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.427437968556628e-05,
+      "loss": 4.2305,
+      "step": 349696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.426599373805576e-05,
+      "loss": 4.2333,
+      "step": 350208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.425762416934897e-05,
+      "loss": 4.2319,
+      "step": 350720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.424923822183845e-05,
+      "loss": 4.2344,
+      "step": 351232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.424085227432793e-05,
+      "loss": 4.2344,
+      "step": 351744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.423246632681741e-05,
+      "loss": 4.225,
+      "step": 352256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.422408037930689e-05,
+      "loss": 4.2303,
+      "step": 352768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.421569443179637e-05,
+      "loss": 4.229,
+      "step": 353280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.420730848428585e-05,
+      "loss": 4.2361,
+      "step": 353792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.419892253677533e-05,
+      "loss": 4.2275,
+      "step": 354304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.419053658926481e-05,
+      "loss": 4.2232,
+      "step": 354816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.418215064175429e-05,
+      "loss": 4.2306,
+      "step": 355328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.417376469424377e-05,
+      "loss": 4.2252,
+      "step": 355840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.416537874673325e-05,
+      "loss": 4.2457,
+      "step": 356352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.415700917802646e-05,
+      "loss": 4.2259,
+      "step": 356864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.414862323051594e-05,
+      "loss": 4.2248,
+      "step": 357376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.414023728300542e-05,
+      "loss": 4.2211,
+      "step": 357888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.41318513354949e-05,
+      "loss": 4.2282,
+      "step": 358400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.412348176678811e-05,
+      "loss": 4.2034,
+      "step": 358912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.411509581927759e-05,
+      "loss": 4.2148,
+      "step": 359424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.410670987176707e-05,
+      "loss": 4.2236,
+      "step": 359936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.409832392425655e-05,
+      "loss": 4.2239,
+      "step": 360448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.408993797674603e-05,
+      "loss": 4.2195,
+      "step": 360960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.4081552029235515e-05,
+      "loss": 4.2114,
+      "step": 361472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.4073166081724995e-05,
+      "loss": 4.2148,
+      "step": 361984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.4064780134214475e-05,
+      "loss": 4.2312,
+      "step": 362496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.4056394186703955e-05,
+      "loss": 4.2217,
+      "step": 363008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.404804099680089e-05,
+      "loss": 4.2239,
+      "step": 363520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.403965504929037e-05,
+      "loss": 4.2232,
+      "step": 364032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.403126910177985e-05,
+      "loss": 4.2332,
+      "step": 364544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.402288315426933e-05,
+      "loss": 4.2247,
+      "step": 365056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.401449720675881e-05,
+      "loss": 4.211,
+      "step": 365568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.400611125924829e-05,
+      "loss": 4.2195,
+      "step": 366080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.399772531173777e-05,
+      "loss": 4.225,
+      "step": 366592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.398933936422725e-05,
+      "loss": 4.209,
+      "step": 367104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.39809861743242e-05,
+      "loss": 4.2122,
+      "step": 367616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.397260022681368e-05,
+      "loss": 4.2125,
+      "step": 368128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.396421427930316e-05,
+      "loss": 4.2181,
+      "step": 368640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.395582833179264e-05,
+      "loss": 4.2138,
+      "step": 369152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.394744238428212e-05,
+      "loss": 4.2268,
+      "step": 369664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.393907281557533e-05,
+      "loss": 4.2003,
+      "step": 370176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.393068686806481e-05,
+      "loss": 4.2161,
+      "step": 370688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.392230092055429e-05,
+      "loss": 4.2026,
+      "step": 371200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.391391497304377e-05,
+      "loss": 4.2317,
+      "step": 371712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.390552902553325e-05,
+      "loss": 4.2166,
+      "step": 372224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.389714307802273e-05,
+      "loss": 4.2211,
+      "step": 372736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3888773509315936e-05,
+      "loss": 4.2044,
+      "step": 373248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.388038756180542e-05,
+      "loss": 4.2213,
+      "step": 373760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.38720016142949e-05,
+      "loss": 4.2264,
+      "step": 374272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.386361566678438e-05,
+      "loss": 4.2153,
+      "step": 374784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.385522971927386e-05,
+      "loss": 4.2118,
+      "step": 375296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3846843771763336e-05,
+      "loss": 4.2199,
+      "step": 375808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.383847420305655e-05,
+      "loss": 4.2114,
+      "step": 376320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.383008825554603e-05,
+      "loss": 4.2221,
+      "step": 376832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3821702308035505e-05,
+      "loss": 4.2019,
+      "step": 377344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3813316360524985e-05,
+      "loss": 4.2113,
+      "step": 377856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3804930413014465e-05,
+      "loss": 4.2061,
+      "step": 378368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.379656084430768e-05,
+      "loss": 4.2012,
+      "step": 378880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.378817489679716e-05,
+      "loss": 4.2141,
+      "step": 379392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3779805328090376e-05,
+      "loss": 4.209,
+      "step": 379904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3771419380579856e-05,
+      "loss": 4.2125,
+      "step": 380416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3763033433069336e-05,
+      "loss": 4.2124,
+      "step": 380928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.375464748555881e-05,
+      "loss": 4.2095,
+      "step": 381440
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.202969551086426,
+      "eval_runtime": 294.3955,
+      "eval_samples_per_second": 1296.185,
+      "eval_steps_per_second": 40.507,
+      "step": 381600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.374626153804829e-05,
+      "loss": 4.2062,
+      "step": 381952
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.373787559053777e-05,
+      "loss": 4.1931,
+      "step": 382464
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.372948964302725e-05,
+      "loss": 4.2216,
+      "step": 382976
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.372112007432046e-05,
+      "loss": 4.2062,
+      "step": 383488
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.371273412680994e-05,
+      "loss": 4.2176,
+      "step": 384000
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.370434817929942e-05,
+      "loss": 4.2059,
+      "step": 384512
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.36959622317889e-05,
+      "loss": 4.1969,
+      "step": 385024
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.368757628427838e-05,
+      "loss": 4.1945,
+      "step": 385536
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.367919033676786e-05,
+      "loss": 4.2054,
+      "step": 386048
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.3670804389257345e-05,
+      "loss": 4.2114,
+      "step": 386560
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.3662418441746825e-05,
+      "loss": 4.2082,
+      "step": 387072
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.3654048873040034e-05,
+      "loss": 4.2182,
+      "step": 387584
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.364567930433324e-05,
+      "loss": 4.1985,
+      "step": 388096
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.363729335682272e-05,
+      "loss": 4.2045,
+      "step": 388608
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.36289074093122e-05,
+      "loss": 4.1994,
+      "step": 389120
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.362052146180168e-05,
+      "loss": 4.1904,
+      "step": 389632
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.361213551429116e-05,
+      "loss": 4.2034,
+      "step": 390144
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.360374956678064e-05,
+      "loss": 4.2008,
+      "step": 390656
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.359536361927012e-05,
+      "loss": 4.1987,
+      "step": 391168
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.35869776717596e-05,
+      "loss": 4.2089,
+      "step": 391680
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.357859172424908e-05,
+      "loss": 4.2085,
+      "step": 392192
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.357020577673856e-05,
+      "loss": 4.2034,
+      "step": 392704
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.356181982922804e-05,
+      "loss": 4.1991,
+      "step": 393216
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.355343388171752e-05,
+      "loss": 4.2068,
+      "step": 393728
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.354506431301074e-05,
+      "loss": 4.1962,
+      "step": 394240
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.353667836550022e-05,
+      "loss": 4.2014,
+      "step": 394752
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.352829241798969e-05,
+      "loss": 4.1924,
+      "step": 395264
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.351990647047917e-05,
+      "loss": 4.1951,
+      "step": 395776
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.351153690177239e-05,
+      "loss": 4.1974,
+      "step": 396288
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.350315095426187e-05,
+      "loss": 4.1907,
+      "step": 396800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3494781385555076e-05,
+      "loss": 4.1982,
+      "step": 397312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3486395438044556e-05,
+      "loss": 4.199,
+      "step": 397824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3478009490534036e-05,
+      "loss": 4.1996,
+      "step": 398336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3469623543023516e-05,
+      "loss": 4.1946,
+      "step": 398848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3461237595512996e-05,
+      "loss": 4.1869,
+      "step": 399360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.345286802680621e-05,
+      "loss": 4.2016,
+      "step": 399872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.344448207929569e-05,
+      "loss": 4.1898,
+      "step": 400384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3436096131785165e-05,
+      "loss": 4.1754,
+      "step": 400896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3427710184274645e-05,
+      "loss": 4.1871,
+      "step": 401408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.341934061556786e-05,
+      "loss": 4.1821,
+      "step": 401920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.341095466805734e-05,
+      "loss": 4.1922,
+      "step": 402432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3402568720546814e-05,
+      "loss": 4.1983,
+      "step": 402944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3394182773036294e-05,
+      "loss": 4.1895,
+      "step": 403456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3385796825525774e-05,
+      "loss": 4.1928,
+      "step": 403968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3377410878015254e-05,
+      "loss": 4.1977,
+      "step": 404480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3369024930504734e-05,
+      "loss": 4.1861,
+      "step": 404992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3360638982994214e-05,
+      "loss": 4.1927,
+      "step": 405504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.335226941428743e-05,
+      "loss": 4.1866,
+      "step": 406016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.334389984558064e-05,
+      "loss": 4.1681,
+      "step": 406528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.333551389807012e-05,
+      "loss": 4.2004,
+      "step": 407040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.33271279505596e-05,
+      "loss": 4.1835,
+      "step": 407552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.331874200304908e-05,
+      "loss": 4.1962,
+      "step": 408064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.331035605553856e-05,
+      "loss": 4.1838,
+      "step": 408576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.330197010802804e-05,
+      "loss": 4.1814,
+      "step": 409088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.329358416051752e-05,
+      "loss": 4.1792,
+      "step": 409600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3285198213007e-05,
+      "loss": 4.1836,
+      "step": 410112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.327682864430021e-05,
+      "loss": 4.1843,
+      "step": 410624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.326844269678969e-05,
+      "loss": 4.1929,
+      "step": 411136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3260073128082904e-05,
+      "loss": 4.1842,
+      "step": 411648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3251687180572383e-05,
+      "loss": 4.1821,
+      "step": 412160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3243301233061863e-05,
+      "loss": 4.1678,
+      "step": 412672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.323493166435507e-05,
+      "loss": 4.1957,
+      "step": 413184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.322654571684455e-05,
+      "loss": 4.1668,
+      "step": 413696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.321815976933403e-05,
+      "loss": 4.1681,
+      "step": 414208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.320977382182351e-05,
+      "loss": 4.1825,
+      "step": 414720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.320138787431299e-05,
+      "loss": 4.1843,
+      "step": 415232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.319300192680247e-05,
+      "loss": 4.1668,
+      "step": 415744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.318461597929195e-05,
+      "loss": 4.1757,
+      "step": 416256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.317623003178143e-05,
+      "loss": 4.1636,
+      "step": 416768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.316784408427091e-05,
+      "loss": 4.1912,
+      "step": 417280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.315947451556412e-05,
+      "loss": 4.1843,
+      "step": 417792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.315108856805361e-05,
+      "loss": 4.1816,
+      "step": 418304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.314270262054309e-05,
+      "loss": 4.1797,
+      "step": 418816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.313431667303257e-05,
+      "loss": 4.1847,
+      "step": 419328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.312593072552205e-05,
+      "loss": 4.1929,
+      "step": 419840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.311754477801153e-05,
+      "loss": 4.1678,
+      "step": 420352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.310917520930474e-05,
+      "loss": 4.1815,
+      "step": 420864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.310078926179422e-05,
+      "loss": 4.1787,
+      "step": 421376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.30924033142837e-05,
+      "loss": 4.1649,
+      "step": 421888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.308401736677318e-05,
+      "loss": 4.1854,
+      "step": 422400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3075647798066386e-05,
+      "loss": 4.1804,
+      "step": 422912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3067261850555866e-05,
+      "loss": 4.1815,
+      "step": 423424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3058875903045346e-05,
+      "loss": 4.1927,
+      "step": 423936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3050489955534826e-05,
+      "loss": 4.1693,
+      "step": 424448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3042104008024306e-05,
+      "loss": 4.1748,
+      "step": 424960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3033718060513786e-05,
+      "loss": 4.1693,
+      "step": 425472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3025332113003266e-05,
+      "loss": 4.1754,
+      "step": 425984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3016962544296475e-05,
+      "loss": 4.1793,
+      "step": 426496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3008576596785955e-05,
+      "loss": 4.178,
+      "step": 427008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.3000190649275435e-05,
+      "loss": 4.1752,
+      "step": 427520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2991804701764915e-05,
+      "loss": 4.183,
+      "step": 428032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2983435133058124e-05,
+      "loss": 4.172,
+      "step": 428544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2975049185547604e-05,
+      "loss": 4.1747,
+      "step": 429056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2966663238037084e-05,
+      "loss": 4.1774,
+      "step": 429568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2958277290526564e-05,
+      "loss": 4.1801,
+      "step": 430080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2949891343016043e-05,
+      "loss": 4.1772,
+      "step": 430592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.294150539550553e-05,
+      "loss": 4.1663,
+      "step": 431104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.293311944799501e-05,
+      "loss": 4.1782,
+      "step": 431616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.292473350048449e-05,
+      "loss": 4.1737,
+      "step": 432128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.291638031058143e-05,
+      "loss": 4.1858,
+      "step": 432640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.290799436307091e-05,
+      "loss": 4.1736,
+      "step": 433152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.289960841556039e-05,
+      "loss": 4.1714,
+      "step": 433664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.289122246804987e-05,
+      "loss": 4.1665,
+      "step": 434176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.288283652053935e-05,
+      "loss": 4.1814,
+      "step": 434688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.287445057302883e-05,
+      "loss": 4.1496,
+      "step": 435200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.286606462551831e-05,
+      "loss": 4.1673,
+      "step": 435712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.285767867800779e-05,
+      "loss": 4.1702,
+      "step": 436224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2849309109301e-05,
+      "loss": 4.169,
+      "step": 436736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.284092316179048e-05,
+      "loss": 4.1663,
+      "step": 437248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2832537214279964e-05,
+      "loss": 4.1599,
+      "step": 437760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2824151266769444e-05,
+      "loss": 4.1655,
+      "step": 438272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2815765319258924e-05,
+      "loss": 4.1782,
+      "step": 438784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.280739575055213e-05,
+      "loss": 4.1723,
+      "step": 439296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.279900980304161e-05,
+      "loss": 4.1671,
+      "step": 439808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.279062385553109e-05,
+      "loss": 4.1748,
+      "step": 440320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.278223790802057e-05,
+      "loss": 4.1817,
+      "step": 440832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.277385196051005e-05,
+      "loss": 4.1731,
+      "step": 441344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.276546601299953e-05,
+      "loss": 4.1594,
+      "step": 441856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.275708006548901e-05,
+      "loss": 4.1685,
+      "step": 442368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2748694117978486e-05,
+      "loss": 4.1715,
+      "step": 442880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2740308170467966e-05,
+      "loss": 4.156,
+      "step": 443392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.273193860176118e-05,
+      "loss": 4.1597,
+      "step": 443904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.272355265425066e-05,
+      "loss": 4.1663,
+      "step": 444416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.271516670674014e-05,
+      "loss": 4.1639,
+      "step": 444928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.270679713803336e-05,
+      "loss": 4.1653,
+      "step": 445440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.269841119052284e-05,
+      "loss": 4.1735,
+      "step": 445952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.269002524301231e-05,
+      "loss": 4.1519,
+      "step": 446464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2681655674305526e-05,
+      "loss": 4.1642,
+      "step": 446976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2673269726795006e-05,
+      "loss": 4.156,
+      "step": 447488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2664883779284486e-05,
+      "loss": 4.177,
+      "step": 448000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.265649783177396e-05,
+      "loss": 4.1665,
+      "step": 448512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.264811188426344e-05,
+      "loss": 4.1694,
+      "step": 449024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2639742315556655e-05,
+      "loss": 4.1585,
+      "step": 449536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2631356368046135e-05,
+      "loss": 4.1701,
+      "step": 450048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2622970420535615e-05,
+      "loss": 4.1728,
+      "step": 450560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2614584473025095e-05,
+      "loss": 4.1647,
+      "step": 451072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.260621490431831e-05,
+      "loss": 4.1645,
+      "step": 451584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2597828956807784e-05,
+      "loss": 4.1689,
+      "step": 452096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2589443009297264e-05,
+      "loss": 4.1617,
+      "step": 452608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2581057061786744e-05,
+      "loss": 4.1737,
+      "step": 453120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2572671114276224e-05,
+      "loss": 4.1539,
+      "step": 453632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.256430154556943e-05,
+      "loss": 4.1643,
+      "step": 454144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.255591559805891e-05,
+      "loss": 4.1516,
+      "step": 454656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.254752965054839e-05,
+      "loss": 4.1538,
+      "step": 455168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.253914370303787e-05,
+      "loss": 4.1649,
+      "step": 455680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.253077413433109e-05,
+      "loss": 4.1551,
+      "step": 456192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.252238818682057e-05,
+      "loss": 4.1703,
+      "step": 456704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.251400223931005e-05,
+      "loss": 4.1538,
+      "step": 457216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.250561629179953e-05,
+      "loss": 4.1671,
+      "step": 457728
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.1620635986328125,
+      "eval_runtime": 293.9667,
+      "eval_samples_per_second": 1298.075,
+      "eval_steps_per_second": 40.566,
+      "step": 457920
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.249723034428901e-05,
+      "loss": 4.1695,
+      "step": 458240
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.248884439677849e-05,
+      "loss": 4.1418,
+      "step": 458752
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.248045844926797e-05,
+      "loss": 4.1682,
+      "step": 459264
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.247207250175745e-05,
+      "loss": 4.1615,
+      "step": 459776
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.246368655424693e-05,
+      "loss": 4.1688,
+      "step": 460288
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.245530060673641e-05,
+      "loss": 4.1564,
+      "step": 460800
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.244691465922589e-05,
+      "loss": 4.1472,
+      "step": 461312
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.243852871171537e-05,
+      "loss": 4.1485,
+      "step": 461824
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.243014276420485e-05,
+      "loss": 4.1577,
+      "step": 462336
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.242175681669432e-05,
+      "loss": 4.1598,
+      "step": 462848
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.241337086918381e-05,
+      "loss": 4.1629,
+      "step": 463360
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.240498492167329e-05,
+      "loss": 4.167,
+      "step": 463872
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.23966153529665e-05,
+      "loss": 4.1559,
+      "step": 464384
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.238822940545598e-05,
+      "loss": 4.1568,
+      "step": 464896
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.237984345794546e-05,
+      "loss": 4.1494,
+      "step": 465408
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.237145751043494e-05,
+      "loss": 4.1398,
+      "step": 465920
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2363087941728146e-05,
+      "loss": 4.1587,
+      "step": 466432
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2354701994217626e-05,
+      "loss": 4.1511,
+      "step": 466944
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2346316046707106e-05,
+      "loss": 4.1509,
+      "step": 467456
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2337930099196586e-05,
+      "loss": 4.1594,
+      "step": 467968
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2329544151686066e-05,
+      "loss": 4.1613,
+      "step": 468480
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2321158204175546e-05,
+      "loss": 4.1559,
+      "step": 468992
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2312772256665026e-05,
+      "loss": 4.151,
+      "step": 469504
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2304386309154506e-05,
+      "loss": 4.1576,
+      "step": 470016
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.2296000361643986e-05,
+      "loss": 4.1535,
+      "step": 470528
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.228761441413347e-05,
+      "loss": 4.1542,
+      "step": 471040
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.227922846662295e-05,
+      "loss": 4.1444,
+      "step": 471552
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.227085889791616e-05,
+      "loss": 4.1492,
+      "step": 472064
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.226247295040564e-05,
+      "loss": 4.1508,
+      "step": 472576
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.225408700289512e-05,
+      "loss": 4.1473,
+      "step": 473088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.22457010553846e-05,
+      "loss": 4.1478,
+      "step": 473600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.223731510787408e-05,
+      "loss": 4.1512,
+      "step": 474112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.222892916036356e-05,
+      "loss": 4.1536,
+      "step": 474624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2220543212853035e-05,
+      "loss": 4.1516,
+      "step": 475136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2212157265342515e-05,
+      "loss": 4.1418,
+      "step": 475648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.220378769663573e-05,
+      "loss": 4.1523,
+      "step": 476160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.219540174912521e-05,
+      "loss": 4.1469,
+      "step": 476672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.218701580161469e-05,
+      "loss": 4.1332,
+      "step": 477184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.217862985410417e-05,
+      "loss": 4.1379,
+      "step": 477696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.217024390659365e-05,
+      "loss": 4.1366,
+      "step": 478208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.216185795908313e-05,
+      "loss": 4.1422,
+      "step": 478720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.215347201157261e-05,
+      "loss": 4.1505,
+      "step": 479232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.214508606406209e-05,
+      "loss": 4.1443,
+      "step": 479744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.21367164953553e-05,
+      "loss": 4.1511,
+      "step": 480256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.212833054784478e-05,
+      "loss": 4.1523,
+      "step": 480768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.211994460033426e-05,
+      "loss": 4.1393,
+      "step": 481280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.211157503162747e-05,
+      "loss": 4.145,
+      "step": 481792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.210318908411695e-05,
+      "loss": 4.1441,
+      "step": 482304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.209480313660643e-05,
+      "loss": 4.1195,
+      "step": 482816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.208641718909591e-05,
+      "loss": 4.1595,
+      "step": 483328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2078031241585395e-05,
+      "loss": 4.1365,
+      "step": 483840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2069645294074875e-05,
+      "loss": 4.154,
+      "step": 484352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2061275725368084e-05,
+      "loss": 4.134,
+      "step": 484864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2052889777857564e-05,
+      "loss": 4.1406,
+      "step": 485376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2044503830347044e-05,
+      "loss": 4.1348,
+      "step": 485888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2036117882836524e-05,
+      "loss": 4.1366,
+      "step": 486400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2027731935326004e-05,
+      "loss": 4.1403,
+      "step": 486912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.2019345987815484e-05,
+      "loss": 4.15,
+      "step": 487424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.201097641910869e-05,
+      "loss": 4.1456,
+      "step": 487936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.200259047159817e-05,
+      "loss": 4.1325,
+      "step": 488448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.199420452408765e-05,
+      "loss": 4.1247,
+      "step": 488960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.198581857657713e-05,
+      "loss": 4.1558,
+      "step": 489472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.197744900787035e-05,
+      "loss": 4.1223,
+      "step": 489984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.196906306035983e-05,
+      "loss": 4.1216,
+      "step": 490496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.196067711284931e-05,
+      "loss": 4.1405,
+      "step": 491008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.195229116533879e-05,
+      "loss": 4.1455,
+      "step": 491520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.194390521782827e-05,
+      "loss": 4.1211,
+      "step": 492032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.193553564912148e-05,
+      "loss": 4.1324,
+      "step": 492544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.192714970161096e-05,
+      "loss": 4.1235,
+      "step": 493056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.191876375410044e-05,
+      "loss": 4.1453,
+      "step": 493568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1910394185393647e-05,
+      "loss": 4.1419,
+      "step": 494080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1902008237883126e-05,
+      "loss": 4.1406,
+      "step": 494592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1893622290372606e-05,
+      "loss": 4.1322,
+      "step": 495104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1885236342862086e-05,
+      "loss": 4.1401,
+      "step": 495616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1876850395351566e-05,
+      "loss": 4.153,
+      "step": 496128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1868464447841046e-05,
+      "loss": 4.1203,
+      "step": 496640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1860078500330526e-05,
+      "loss": 4.1385,
+      "step": 497152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1851692552820006e-05,
+      "loss": 4.1382,
+      "step": 497664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1843306605309486e-05,
+      "loss": 4.123,
+      "step": 498176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1834920657798966e-05,
+      "loss": 4.14,
+      "step": 498688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1826534710288446e-05,
+      "loss": 4.139,
+      "step": 499200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1818148762777926e-05,
+      "loss": 4.1401,
+      "step": 499712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1809779194071135e-05,
+      "loss": 4.1501,
+      "step": 500224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1801393246560615e-05,
+      "loss": 4.1243,
+      "step": 500736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1793007299050095e-05,
+      "loss": 4.1315,
+      "step": 501248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1784621351539575e-05,
+      "loss": 4.1279,
+      "step": 501760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1776235404029055e-05,
+      "loss": 4.1343,
+      "step": 502272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.1767865835322264e-05,
+      "loss": 4.1341,
+      "step": 502784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.175947988781175e-05,
+      "loss": 4.1417,
+      "step": 503296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.175109394030123e-05,
+      "loss": 4.125,
+      "step": 503808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.174270799279071e-05,
+      "loss": 4.1448,
+      "step": 504320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.173432204528019e-05,
+      "loss": 4.1322,
+      "step": 504832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.17259524765734e-05,
+      "loss": 4.1347,
+      "step": 505344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.171756652906288e-05,
+      "loss": 4.136,
+      "step": 505856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.170918058155236e-05,
+      "loss": 4.1342,
+      "step": 506368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.170079463404184e-05,
+      "loss": 4.1399,
+      "step": 506880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.169240868653132e-05,
+      "loss": 4.124,
+      "step": 507392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.16840227390208e-05,
+      "loss": 4.1342,
+      "step": 507904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.167563679151028e-05,
+      "loss": 4.1355,
+      "step": 508416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.166726722280349e-05,
+      "loss": 4.1414,
+      "step": 508928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.165888127529297e-05,
+      "loss": 4.1353,
+      "step": 509440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1650511706586184e-05,
+      "loss": 4.1281,
+      "step": 509952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1642142137879393e-05,
+      "loss": 4.1277,
+      "step": 510464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1633756190368873e-05,
+      "loss": 4.137,
+      "step": 510976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1625370242858353e-05,
+      "loss": 4.1079,
+      "step": 511488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.161698429534783e-05,
+      "loss": 4.1246,
+      "step": 512000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.160859834783731e-05,
+      "loss": 4.127,
+      "step": 512512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.160021240032679e-05,
+      "loss": 4.1289,
+      "step": 513024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.159182645281627e-05,
+      "loss": 4.1234,
+      "step": 513536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.158344050530575e-05,
+      "loss": 4.1236,
+      "step": 514048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1575054557795226e-05,
+      "loss": 4.1248,
+      "step": 514560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1566668610284706e-05,
+      "loss": 4.1357,
+      "step": 515072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1558282662774186e-05,
+      "loss": 4.1335,
+      "step": 515584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.15499130940674e-05,
+      "loss": 4.1229,
+      "step": 516096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.154152714655688e-05,
+      "loss": 4.134,
+      "step": 516608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.153314119904636e-05,
+      "loss": 4.1422,
+      "step": 517120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.152475525153584e-05,
+      "loss": 4.1352,
+      "step": 517632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.151636930402532e-05,
+      "loss": 4.1195,
+      "step": 518144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.15079833565148e-05,
+      "loss": 4.1271,
+      "step": 518656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.149961378780801e-05,
+      "loss": 4.131,
+      "step": 519168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.149122784029749e-05,
+      "loss": 4.1145,
+      "step": 519680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.148284189278697e-05,
+      "loss": 4.1207,
+      "step": 520192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.147445594527645e-05,
+      "loss": 4.125,
+      "step": 520704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.146606999776593e-05,
+      "loss": 4.1204,
+      "step": 521216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.145768405025541e-05,
+      "loss": 4.1271,
+      "step": 521728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.144931448154863e-05,
+      "loss": 4.1334,
+      "step": 522240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.144092853403811e-05,
+      "loss": 4.1167,
+      "step": 522752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1432542586527587e-05,
+      "loss": 4.1189,
+      "step": 523264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1424173017820796e-05,
+      "loss": 4.1231,
+      "step": 523776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1415787070310276e-05,
+      "loss": 4.1338,
+      "step": 524288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1407401122799756e-05,
+      "loss": 4.1279,
+      "step": 524800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1399015175289236e-05,
+      "loss": 4.1323,
+      "step": 525312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1390629227778716e-05,
+      "loss": 4.1166,
+      "step": 525824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1382243280268195e-05,
+      "loss": 4.1313,
+      "step": 526336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1373857332757675e-05,
+      "loss": 4.132,
+      "step": 526848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1365471385247155e-05,
+      "loss": 4.125,
+      "step": 527360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1357085437736635e-05,
+      "loss": 4.1274,
+      "step": 527872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1348715869029844e-05,
+      "loss": 4.1279,
+      "step": 528384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1340329921519324e-05,
+      "loss": 4.1229,
+      "step": 528896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1331943974008804e-05,
+      "loss": 4.1348,
+      "step": 529408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.132355802649829e-05,
+      "loss": 4.1158,
+      "step": 529920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.131517207898777e-05,
+      "loss": 4.1238,
+      "step": 530432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1306786131477244e-05,
+      "loss": 4.1146,
+      "step": 530944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1298400183966724e-05,
+      "loss": 4.1144,
+      "step": 531456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.1290014236456204e-05,
+      "loss": 4.1225,
+      "step": 531968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.128164466774942e-05,
+      "loss": 4.1188,
+      "step": 532480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.127325872023889e-05,
+      "loss": 4.1289,
+      "step": 532992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.126487277272837e-05,
+      "loss": 4.1172,
+      "step": 533504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.125648682521785e-05,
+      "loss": 4.1296,
+      "step": 534016
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.131078243255615,
+      "eval_runtime": 352.6289,
+      "eval_samples_per_second": 1082.132,
+      "eval_steps_per_second": 33.817,
+      "step": 534240
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.124810087770733e-05,
+      "loss": 4.1146,
+      "step": 534528
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.123973130900054e-05,
+      "loss": 4.1048,
+      "step": 535040
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.123134536149003e-05,
+      "loss": 4.1285,
+      "step": 535552
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.122295941397951e-05,
+      "loss": 4.1207,
+      "step": 536064
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.121457346646899e-05,
+      "loss": 4.1286,
+      "step": 536576
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.120618751895847e-05,
+      "loss": 4.1201,
+      "step": 537088
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.119780157144795e-05,
+      "loss": 4.1126,
+      "step": 537600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.118943200274116e-05,
+      "loss": 4.1136,
+      "step": 538112
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.118104605523064e-05,
+      "loss": 4.1189,
+      "step": 538624
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.117266010772012e-05,
+      "loss": 4.1224,
+      "step": 539136
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.11642741602096e-05,
+      "loss": 4.1223,
+      "step": 539648
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.115588821269908e-05,
+      "loss": 4.1297,
+      "step": 540160
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.114751864399229e-05,
+      "loss": 4.1166,
+      "step": 540672
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.113913269648177e-05,
+      "loss": 4.1163,
+      "step": 541184
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.1130746748971247e-05,
+      "loss": 4.113,
+      "step": 541696
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.1122360801460727e-05,
+      "loss": 4.1024,
+      "step": 542208
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.111399123275394e-05,
+      "loss": 4.1205,
+      "step": 542720
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.110560528524342e-05,
+      "loss": 4.1151,
+      "step": 543232
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.10972193377329e-05,
+      "loss": 4.1133,
+      "step": 543744
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.108883339022238e-05,
+      "loss": 4.1145,
+      "step": 544256
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.108044744271186e-05,
+      "loss": 4.1301,
+      "step": 544768
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.107206149520134e-05,
+      "loss": 4.1179,
+      "step": 545280
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.106367554769082e-05,
+      "loss": 4.114,
+      "step": 545792
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.10552896001803e-05,
+      "loss": 4.1178,
+      "step": 546304
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.104692003147351e-05,
+      "loss": 4.1149,
+      "step": 546816
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.103853408396299e-05,
+      "loss": 4.1167,
+      "step": 547328
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.10301645152562e-05,
+      "loss": 4.1102,
+      "step": 547840
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.102177856774568e-05,
+      "loss": 4.1146,
+      "step": 548352
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.101339262023517e-05,
+      "loss": 4.1103,
+      "step": 548864
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.100500667272465e-05,
+      "loss": 4.1081,
+      "step": 549376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.099662072521413e-05,
+      "loss": 4.1092,
+      "step": 549888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.098823477770361e-05,
+      "loss": 4.1163,
+      "step": 550400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.097984883019308e-05,
+      "loss": 4.1166,
+      "step": 550912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.097146288268256e-05,
+      "loss": 4.1145,
+      "step": 551424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0963093313975776e-05,
+      "loss": 4.1067,
+      "step": 551936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0954723745268985e-05,
+      "loss": 4.1111,
+      "step": 552448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0946337797758465e-05,
+      "loss": 4.1112,
+      "step": 552960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0937951850247945e-05,
+      "loss": 4.1001,
+      "step": 553472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0929565902737425e-05,
+      "loss": 4.1003,
+      "step": 553984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0921179955226905e-05,
+      "loss": 4.0984,
+      "step": 554496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0912794007716385e-05,
+      "loss": 4.1109,
+      "step": 555008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0904408060205865e-05,
+      "loss": 4.1124,
+      "step": 555520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0896022112695345e-05,
+      "loss": 4.1053,
+      "step": 556032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0887636165184825e-05,
+      "loss": 4.1164,
+      "step": 556544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0879266596478034e-05,
+      "loss": 4.1129,
+      "step": 557056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0870880648967514e-05,
+      "loss": 4.1045,
+      "step": 557568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0862494701456994e-05,
+      "loss": 4.1115,
+      "step": 558080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.08541251327502e-05,
+      "loss": 4.1063,
+      "step": 558592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.084573918523968e-05,
+      "loss": 4.0857,
+      "step": 559104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.083735323772916e-05,
+      "loss": 4.1187,
+      "step": 559616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.082896729021864e-05,
+      "loss": 4.099,
+      "step": 560128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.082058134270812e-05,
+      "loss": 4.1189,
+      "step": 560640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.08121953951976e-05,
+      "loss": 4.1026,
+      "step": 561152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.080380944768708e-05,
+      "loss": 4.1031,
+      "step": 561664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.079542350017657e-05,
+      "loss": 4.0962,
+      "step": 562176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.078707031027351e-05,
+      "loss": 4.1005,
+      "step": 562688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.077868436276299e-05,
+      "loss": 4.1069,
+      "step": 563200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.077029841525247e-05,
+      "loss": 4.1105,
+      "step": 563712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.076191246774195e-05,
+      "loss": 4.1093,
+      "step": 564224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.075352652023143e-05,
+      "loss": 4.1006,
+      "step": 564736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0745156951524636e-05,
+      "loss": 4.0894,
+      "step": 565248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0736771004014116e-05,
+      "loss": 4.1195,
+      "step": 565760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0728385056503596e-05,
+      "loss": 4.09,
+      "step": 566272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0719999108993076e-05,
+      "loss": 4.0795,
+      "step": 566784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0711613161482556e-05,
+      "loss": 4.1043,
+      "step": 567296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0703227213972036e-05,
+      "loss": 4.1106,
+      "step": 567808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.069484126646152e-05,
+      "loss": 4.0898,
+      "step": 568320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0686455318951e-05,
+      "loss": 4.0984,
+      "step": 568832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.067810212904794e-05,
+      "loss": 4.0847,
+      "step": 569344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.066971618153742e-05,
+      "loss": 4.1079,
+      "step": 569856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.06613302340269e-05,
+      "loss": 4.1086,
+      "step": 570368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.065294428651638e-05,
+      "loss": 4.1028,
+      "step": 570880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.064455833900586e-05,
+      "loss": 4.0998,
+      "step": 571392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.063617239149534e-05,
+      "loss": 4.1046,
+      "step": 571904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.062778644398482e-05,
+      "loss": 4.1168,
+      "step": 572416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.061941687527803e-05,
+      "loss": 4.089,
+      "step": 572928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.061103092776751e-05,
+      "loss": 4.1054,
+      "step": 573440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.060264498025699e-05,
+      "loss": 4.0974,
+      "step": 573952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0594259032746476e-05,
+      "loss": 4.0935,
+      "step": 574464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0585873085235956e-05,
+      "loss": 4.1031,
+      "step": 574976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0577487137725436e-05,
+      "loss": 4.1072,
+      "step": 575488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.056910119021491e-05,
+      "loss": 4.1014,
+      "step": 576000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0560731621508125e-05,
+      "loss": 4.1173,
+      "step": 576512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0552345673997605e-05,
+      "loss": 4.0891,
+      "step": 577024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0543959726487085e-05,
+      "loss": 4.0927,
+      "step": 577536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.053557377897656e-05,
+      "loss": 4.0953,
+      "step": 578048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0527204210269774e-05,
+      "loss": 4.1024,
+      "step": 578560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0518818262759254e-05,
+      "loss": 4.099,
+      "step": 579072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.0510432315248734e-05,
+      "loss": 4.1074,
+      "step": 579584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0502046367738214e-05,
+      "loss": 4.0901,
+      "step": 580096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0493660420227694e-05,
+      "loss": 4.1099,
+      "step": 580608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0485274472717174e-05,
+      "loss": 4.1016,
+      "step": 581120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0476888525206654e-05,
+      "loss": 4.0943,
+      "step": 581632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0468502577696134e-05,
+      "loss": 4.1005,
+      "step": 582144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.046013300898934e-05,
+      "loss": 4.1041,
+      "step": 582656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.045174706147882e-05,
+      "loss": 4.1066,
+      "step": 583168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.04433611139683e-05,
+      "loss": 4.0921,
+      "step": 583680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.043497516645778e-05,
+      "loss": 4.0962,
+      "step": 584192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.042660559775099e-05,
+      "loss": 4.0993,
+      "step": 584704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.041823602904421e-05,
+      "loss": 4.1105,
+      "step": 585216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.040985008153368e-05,
+      "loss": 4.1041,
+      "step": 585728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.040146413402317e-05,
+      "loss": 4.0916,
+      "step": 586240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.039307818651265e-05,
+      "loss": 4.0955,
+      "step": 586752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.038469223900213e-05,
+      "loss": 4.1037,
+      "step": 587264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.037630629149161e-05,
+      "loss": 4.0734,
+      "step": 587776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.036792034398109e-05,
+      "loss": 4.0921,
+      "step": 588288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.035953439647057e-05,
+      "loss": 4.0932,
+      "step": 588800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.035114844896005e-05,
+      "loss": 4.0981,
+      "step": 589312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.034277888025326e-05,
+      "loss": 4.0881,
+      "step": 589824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.033439293274274e-05,
+      "loss": 4.0922,
+      "step": 590336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0326023364035946e-05,
+      "loss": 4.094,
+      "step": 590848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0317637416525426e-05,
+      "loss": 4.0976,
+      "step": 591360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0309251469014906e-05,
+      "loss": 4.0966,
+      "step": 591872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0300865521504386e-05,
+      "loss": 4.0929,
+      "step": 592384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0292479573993866e-05,
+      "loss": 4.1011,
+      "step": 592896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.028411000528708e-05,
+      "loss": 4.1086,
+      "step": 593408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.027572405777656e-05,
+      "loss": 4.1013,
+      "step": 593920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.026733811026604e-05,
+      "loss": 4.0867,
+      "step": 594432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.025895216275552e-05,
+      "loss": 4.0941,
+      "step": 594944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.025058259404873e-05,
+      "loss": 4.1008,
+      "step": 595456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.024219664653821e-05,
+      "loss": 4.0808,
+      "step": 595968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.023381069902769e-05,
+      "loss": 4.0898,
+      "step": 596480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.022542475151717e-05,
+      "loss": 4.0921,
+      "step": 596992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.021705518281038e-05,
+      "loss": 4.0897,
+      "step": 597504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.020866923529986e-05,
+      "loss": 4.0898,
+      "step": 598016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.020028328778934e-05,
+      "loss": 4.1013,
+      "step": 598528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.019189734027882e-05,
+      "loss": 4.0861,
+      "step": 599040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0183511392768306e-05,
+      "loss": 4.0877,
+      "step": 599552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0175141824061515e-05,
+      "loss": 4.0863,
+      "step": 600064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0166755876550995e-05,
+      "loss": 4.1053,
+      "step": 600576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0158369929040475e-05,
+      "loss": 4.0915,
+      "step": 601088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0149983981529955e-05,
+      "loss": 4.0998,
+      "step": 601600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0141598034019435e-05,
+      "loss": 4.0842,
+      "step": 602112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0133228465312644e-05,
+      "loss": 4.0977,
+      "step": 602624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0124842517802124e-05,
+      "loss": 4.1005,
+      "step": 603136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0116456570291604e-05,
+      "loss": 4.0951,
+      "step": 603648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0108070622781084e-05,
+      "loss": 4.0944,
+      "step": 604160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.0099684675270564e-05,
+      "loss": 4.0934,
+      "step": 604672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.009131510656377e-05,
+      "loss": 4.091,
+      "step": 605184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.008292915905325e-05,
+      "loss": 4.103,
+      "step": 605696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.007454321154274e-05,
+      "loss": 4.0853,
+      "step": 606208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.006615726403222e-05,
+      "loss": 4.0913,
+      "step": 606720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.005777131652169e-05,
+      "loss": 4.0854,
+      "step": 607232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.004938536901117e-05,
+      "loss": 4.0831,
+      "step": 607744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.004099942150065e-05,
+      "loss": 4.0853,
+      "step": 608256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.003261347399013e-05,
+      "loss": 4.0872,
+      "step": 608768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.002424390528334e-05,
+      "loss": 4.099,
+      "step": 609280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.001585795777282e-05,
+      "loss": 4.086,
+      "step": 609792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.00074720102623e-05,
+      "loss": 4.0958,
+      "step": 610304
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.106538772583008,
+      "eval_runtime": 282.9465,
+      "eval_samples_per_second": 1348.633,
+      "eval_steps_per_second": 42.146,
+      "step": 610560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.999908606275178e-05,
+      "loss": 4.0944,
+      "step": 610816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.999070011524126e-05,
+      "loss": 4.0762,
+      "step": 611328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.998231416773074e-05,
+      "loss": 4.0959,
+      "step": 611840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.997392822022022e-05,
+      "loss": 4.0914,
+      "step": 612352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.996554227270971e-05,
+      "loss": 4.0967,
+      "step": 612864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.995715632519919e-05,
+      "loss": 4.0881,
+      "step": 613376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.994877037768867e-05,
+      "loss": 4.0875,
+      "step": 613888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.994038443017815e-05,
+      "loss": 4.0768,
+      "step": 614400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.993199848266763e-05,
+      "loss": 4.0861,
+      "step": 614912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.992361253515711e-05,
+      "loss": 4.0941,
+      "step": 615424
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.991522658764658e-05,
+      "loss": 4.0881,
+      "step": 615936
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.990684064013606e-05,
+      "loss": 4.0952,
+      "step": 616448
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.989847107142928e-05,
+      "loss": 4.0947,
+      "step": 616960
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.989008512391876e-05,
+      "loss": 4.0782,
+      "step": 617472
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.988169917640823e-05,
+      "loss": 4.0786,
+      "step": 617984
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.987331322889771e-05,
+      "loss": 4.0764,
+      "step": 618496
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.986492728138719e-05,
+      "loss": 4.0884,
+      "step": 619008
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.985654133387668e-05,
+      "loss": 4.0852,
+      "step": 619520
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.984815538636616e-05,
+      "loss": 4.0799,
+      "step": 620032
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.983976943885564e-05,
+      "loss": 4.0859,
+      "step": 620544
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9831399870148846e-05,
+      "loss": 4.0979,
+      "step": 621056
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9823013922638326e-05,
+      "loss": 4.0894,
+      "step": 621568
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9814627975127806e-05,
+      "loss": 4.0858,
+      "step": 622080
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9806242027617286e-05,
+      "loss": 4.0839,
+      "step": 622592
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9797856080106766e-05,
+      "loss": 4.085,
+      "step": 623104
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9789470132596246e-05,
+      "loss": 4.086,
+      "step": 623616
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9781084185085726e-05,
+      "loss": 4.0752,
+      "step": 624128
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9772698237575205e-05,
+      "loss": 4.0899,
+      "step": 624640
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9764312290064685e-05,
+      "loss": 4.079,
+      "step": 625152
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.9755942721357895e-05,
+      "loss": 4.0745,
+      "step": 625664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9747556773847374e-05,
+      "loss": 4.0824,
+      "step": 626176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.973917082633686e-05,
+      "loss": 4.0805,
+      "step": 626688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.973080125763007e-05,
+      "loss": 4.0856,
+      "step": 627200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.972241531011955e-05,
+      "loss": 4.0872,
+      "step": 627712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.971402936260903e-05,
+      "loss": 4.0765,
+      "step": 628224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.970564341509851e-05,
+      "loss": 4.0805,
+      "step": 628736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.969725746758799e-05,
+      "loss": 4.0858,
+      "step": 629248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.968887152007747e-05,
+      "loss": 4.0652,
+      "step": 629760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.968048557256694e-05,
+      "loss": 4.0734,
+      "step": 630272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.967209962505642e-05,
+      "loss": 4.0709,
+      "step": 630784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.966374643515337e-05,
+      "loss": 4.0743,
+      "step": 631296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.965536048764285e-05,
+      "loss": 4.0803,
+      "step": 631808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.964697454013233e-05,
+      "loss": 4.0774,
+      "step": 632320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.963858859262181e-05,
+      "loss": 4.09,
+      "step": 632832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9630202645111295e-05,
+      "loss": 4.0774,
+      "step": 633344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.962181669760077e-05,
+      "loss": 4.079,
+      "step": 633856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9613447128893984e-05,
+      "loss": 4.0786,
+      "step": 634368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9605061181383464e-05,
+      "loss": 4.0783,
+      "step": 634880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9596675233872944e-05,
+      "loss": 4.0537,
+      "step": 635392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.958828928636242e-05,
+      "loss": 4.0883,
+      "step": 635904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.957991971765563e-05,
+      "loss": 4.0716,
+      "step": 636416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.957153377014511e-05,
+      "loss": 4.0865,
+      "step": 636928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.956314782263459e-05,
+      "loss": 4.0738,
+      "step": 637440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9554761875124066e-05,
+      "loss": 4.0757,
+      "step": 637952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9546375927613546e-05,
+      "loss": 4.0641,
+      "step": 638464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.953798998010303e-05,
+      "loss": 4.0726,
+      "step": 638976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.952960403259251e-05,
+      "loss": 4.0819,
+      "step": 639488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.952121808508199e-05,
+      "loss": 4.0736,
+      "step": 640000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.95128485163752e-05,
+      "loss": 4.0823,
+      "step": 640512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.950446256886468e-05,
+      "loss": 4.078,
+      "step": 641024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.949607662135416e-05,
+      "loss": 4.0542,
+      "step": 641536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.948769067384364e-05,
+      "loss": 4.0907,
+      "step": 642048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.947932110513685e-05,
+      "loss": 4.0642,
+      "step": 642560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9470951536430066e-05,
+      "loss": 4.0526,
+      "step": 643072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.946256558891954e-05,
+      "loss": 4.0782,
+      "step": 643584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.945417964140902e-05,
+      "loss": 4.0754,
+      "step": 644096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.94457936938985e-05,
+      "loss": 4.0602,
+      "step": 644608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9437407746387986e-05,
+      "loss": 4.0698,
+      "step": 645120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9429021798877466e-05,
+      "loss": 4.0594,
+      "step": 645632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9420652230170675e-05,
+      "loss": 4.076,
+      "step": 646144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9412266282660155e-05,
+      "loss": 4.0774,
+      "step": 646656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9403880335149635e-05,
+      "loss": 4.072,
+      "step": 647168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9395494387639115e-05,
+      "loss": 4.0731,
+      "step": 647680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9387108440128595e-05,
+      "loss": 4.0736,
+      "step": 648192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9378738871421804e-05,
+      "loss": 4.0867,
+      "step": 648704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9370352923911284e-05,
+      "loss": 4.0591,
+      "step": 649216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9361966976400764e-05,
+      "loss": 4.0744,
+      "step": 649728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9353581028890244e-05,
+      "loss": 4.0711,
+      "step": 650240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9345195081379724e-05,
+      "loss": 4.0663,
+      "step": 650752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9336809133869204e-05,
+      "loss": 4.0748,
+      "step": 651264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.9328423186358684e-05,
+      "loss": 4.0767,
+      "step": 651776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.932003723884817e-05,
+      "loss": 4.0751,
+      "step": 652288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.931166767014138e-05,
+      "loss": 4.0892,
+      "step": 652800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.930328172263086e-05,
+      "loss": 4.0607,
+      "step": 653312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.929489577512034e-05,
+      "loss": 4.0658,
+      "step": 653824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.928650982760982e-05,
+      "loss": 4.0665,
+      "step": 654336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.92781238800993e-05,
+      "loss": 4.0726,
+      "step": 654848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.926973793258878e-05,
+      "loss": 4.0683,
+      "step": 655360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.926136836388199e-05,
+      "loss": 4.0774,
+      "step": 655872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.925298241637147e-05,
+      "loss": 4.0666,
+      "step": 656384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.924459646886095e-05,
+      "loss": 4.0799,
+      "step": 656896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.923621052135042e-05,
+      "loss": 4.0734,
+      "step": 657408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.922782457383991e-05,
+      "loss": 4.0689,
+      "step": 657920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.921943862632939e-05,
+      "loss": 4.0732,
+      "step": 658432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.9211069057622604e-05,
+      "loss": 4.0722,
+      "step": 658944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.920268311011208e-05,
+      "loss": 4.0785,
+      "step": 659456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.919429716260156e-05,
+      "loss": 4.0652,
+      "step": 659968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.918591121509104e-05,
+      "loss": 4.0647,
+      "step": 660480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.917752526758052e-05,
+      "loss": 4.0741,
+      "step": 660992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.916913932007e-05,
+      "loss": 4.0824,
+      "step": 661504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.916075337255948e-05,
+      "loss": 4.0726,
+      "step": 662016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.9152383803852686e-05,
+      "loss": 4.0695,
+      "step": 662528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.9143997856342166e-05,
+      "loss": 4.0666,
+      "step": 663040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.9135611908831646e-05,
+      "loss": 4.0751,
+      "step": 663552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.912724234012486e-05,
+      "loss": 4.046,
+      "step": 664064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.911885639261434e-05,
+      "loss": 4.0666,
+      "step": 664576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.911047044510382e-05,
+      "loss": 4.0536,
+      "step": 665088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.91020844975933e-05,
+      "loss": 4.0709,
+      "step": 665600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.909369855008278e-05,
+      "loss": 4.0609,
+      "step": 666112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.908531260257226e-05,
+      "loss": 4.07,
+      "step": 666624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.907692665506174e-05,
+      "loss": 4.0636,
+      "step": 667136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.906854070755122e-05,
+      "loss": 4.0711,
+      "step": 667648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.906017113884443e-05,
+      "loss": 4.0685,
+      "step": 668160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.905178519133391e-05,
+      "loss": 4.0672,
+      "step": 668672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.904339924382339e-05,
+      "loss": 4.0749,
+      "step": 669184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.903501329631287e-05,
+      "loss": 4.078,
+      "step": 669696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.902662734880235e-05,
+      "loss": 4.0735,
+      "step": 670208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.901824140129183e-05,
+      "loss": 4.0642,
+      "step": 670720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.900985545378131e-05,
+      "loss": 4.0618,
+      "step": 671232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.9001485885074527e-05,
+      "loss": 4.0715,
+      "step": 671744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8993099937564007e-05,
+      "loss": 4.0581,
+      "step": 672256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8984713990053487e-05,
+      "loss": 4.06,
+      "step": 672768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8976328042542966e-05,
+      "loss": 4.0639,
+      "step": 673280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8967958473836176e-05,
+      "loss": 4.0625,
+      "step": 673792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8959572526325656e-05,
+      "loss": 4.0599,
+      "step": 674304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8951186578815135e-05,
+      "loss": 4.0746,
+      "step": 674816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8942800631304615e-05,
+      "loss": 4.0598,
+      "step": 675328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.893441468379409e-05,
+      "loss": 4.0616,
+      "step": 675840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.892602873628357e-05,
+      "loss": 4.0563,
+      "step": 676352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.891764278877305e-05,
+      "loss": 4.0763,
+      "step": 676864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.890925684126253e-05,
+      "loss": 4.0681,
+      "step": 677376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8900887272555744e-05,
+      "loss": 4.0676,
+      "step": 677888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8892501325045224e-05,
+      "loss": 4.0602,
+      "step": 678400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8884115377534704e-05,
+      "loss": 4.0731,
+      "step": 678912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8875729430024184e-05,
+      "loss": 4.072,
+      "step": 679424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8867343482513664e-05,
+      "loss": 4.0694,
+      "step": 679936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8858957535003144e-05,
+      "loss": 4.0692,
+      "step": 680448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8850571587492624e-05,
+      "loss": 4.0603,
+      "step": 680960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.8842185639982104e-05,
+      "loss": 4.0658,
+      "step": 681472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.883381607127531e-05,
+      "loss": 4.0765,
+      "step": 681984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.882543012376479e-05,
+      "loss": 4.0584,
+      "step": 682496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.881704417625427e-05,
+      "loss": 4.0645,
+      "step": 683008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.880865822874375e-05,
+      "loss": 4.0603,
+      "step": 683520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.880028866003696e-05,
+      "loss": 4.0578,
+      "step": 684032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.879190271252645e-05,
+      "loss": 4.0562,
+      "step": 684544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.878351676501593e-05,
+      "loss": 4.0623,
+      "step": 685056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.877514719630914e-05,
+      "loss": 4.0702,
+      "step": 685568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.876676124879862e-05,
+      "loss": 4.063,
+      "step": 686080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.87583753012881e-05,
+      "loss": 4.0672,
+      "step": 686592
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.087634563446045,
+      "eval_runtime": 302.5332,
+      "eval_samples_per_second": 1261.319,
+      "eval_steps_per_second": 39.417,
+      "step": 686880
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.874998935377758e-05,
+      "loss": 4.0545,
+      "step": 687104
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.874160340626706e-05,
+      "loss": 4.0515,
+      "step": 687616
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.873321745875654e-05,
+      "loss": 4.0678,
+      "step": 688128
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.872483151124602e-05,
+      "loss": 4.0629,
+      "step": 688640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.87164455637355e-05,
+      "loss": 4.0729,
+      "step": 689152
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.870805961622497e-05,
+      "loss": 4.0607,
+      "step": 689664
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.869969004751819e-05,
+      "loss": 4.0601,
+      "step": 690176
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8691304100007667e-05,
+      "loss": 4.0494,
+      "step": 690688
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8682918152497147e-05,
+      "loss": 4.0611,
+      "step": 691200
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8674532204986626e-05,
+      "loss": 4.0666,
+      "step": 691712
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8666146257476106e-05,
+      "loss": 4.062,
+      "step": 692224
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.865777668876932e-05,
+      "loss": 4.0693,
+      "step": 692736
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.864940712006253e-05,
+      "loss": 4.0738,
+      "step": 693248
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.864102117255201e-05,
+      "loss": 4.0497,
+      "step": 693760
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.863263522504149e-05,
+      "loss": 4.0532,
+      "step": 694272
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.862424927753097e-05,
+      "loss": 4.0493,
+      "step": 694784
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8615863330020444e-05,
+      "loss": 4.0649,
+      "step": 695296
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8607477382509924e-05,
+      "loss": 4.0535,
+      "step": 695808
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8599091434999404e-05,
+      "loss": 4.0551,
+      "step": 696320
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.8590705487488884e-05,
+      "loss": 4.0597,
+      "step": 696832
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.858231953997837e-05,
+      "loss": 4.0689,
+      "step": 697344
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.857393359246785e-05,
+      "loss": 4.0645,
+      "step": 697856
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.856554764495733e-05,
+      "loss": 4.0603,
+      "step": 698368
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.855716169744681e-05,
+      "loss": 4.0603,
+      "step": 698880
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.854877574993629e-05,
+      "loss": 4.0585,
+      "step": 699392
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.85404061812295e-05,
+      "loss": 4.0636,
+      "step": 699904
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.853203661252271e-05,
+      "loss": 4.0457,
+      "step": 700416
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.852365066501219e-05,
+      "loss": 4.0643,
+      "step": 700928
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.851526471750167e-05,
+      "loss": 4.053,
+      "step": 701440
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.850687876999115e-05,
+      "loss": 4.0496,
+      "step": 701952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.849850920128436e-05,
+      "loss": 4.0594,
+      "step": 702464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.849012325377384e-05,
+      "loss": 4.0568,
+      "step": 702976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.848173730626332e-05,
+      "loss": 4.055,
+      "step": 703488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8473351358752805e-05,
+      "loss": 4.066,
+      "step": 704000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8464965411242285e-05,
+      "loss": 4.0503,
+      "step": 704512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8456579463731765e-05,
+      "loss": 4.0557,
+      "step": 705024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8448193516221245e-05,
+      "loss": 4.0586,
+      "step": 705536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8439807568710724e-05,
+      "loss": 4.0362,
+      "step": 706048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8431421621200204e-05,
+      "loss": 4.0502,
+      "step": 706560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8423035673689684e-05,
+      "loss": 4.0461,
+      "step": 707072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8414649726179164e-05,
+      "loss": 4.0499,
+      "step": 707584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8406280157472373e-05,
+      "loss": 4.0573,
+      "step": 708096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8397894209961853e-05,
+      "loss": 4.0486,
+      "step": 708608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.838950826245133e-05,
+      "loss": 4.0622,
+      "step": 709120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8381122314940807e-05,
+      "loss": 4.0538,
+      "step": 709632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8372736367430287e-05,
+      "loss": 4.0567,
+      "step": 710144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.83643667987235e-05,
+      "loss": 4.0534,
+      "step": 710656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.835598085121299e-05,
+      "loss": 4.0522,
+      "step": 711168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.834759490370246e-05,
+      "loss": 4.034,
+      "step": 711680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.833920895619194e-05,
+      "loss": 4.06,
+      "step": 712192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.833082300868142e-05,
+      "loss": 4.0478,
+      "step": 712704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.832245343997464e-05,
+      "loss": 4.0637,
+      "step": 713216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.831406749246411e-05,
+      "loss": 4.0467,
+      "step": 713728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.830568154495359e-05,
+      "loss": 4.0502,
+      "step": 714240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.829729559744307e-05,
+      "loss": 4.0399,
+      "step": 714752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.828890964993255e-05,
+      "loss": 4.0476,
+      "step": 715264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.828054008122576e-05,
+      "loss": 4.0562,
+      "step": 715776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.827215413371524e-05,
+      "loss": 4.0472,
+      "step": 716288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.826376818620473e-05,
+      "loss": 4.0607,
+      "step": 716800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.825538223869421e-05,
+      "loss": 4.0508,
+      "step": 717312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.824699629118369e-05,
+      "loss": 4.0264,
+      "step": 717824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.823861034367317e-05,
+      "loss": 4.0638,
+      "step": 718336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.823022439616265e-05,
+      "loss": 4.0399,
+      "step": 718848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.822183844865213e-05,
+      "loss": 4.0309,
+      "step": 719360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8213468879945336e-05,
+      "loss": 4.0508,
+      "step": 719872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8205099311238545e-05,
+      "loss": 4.0527,
+      "step": 720384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8196713363728025e-05,
+      "loss": 4.0368,
+      "step": 720896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8188327416217505e-05,
+      "loss": 4.0437,
+      "step": 721408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8179941468706985e-05,
+      "loss": 4.0346,
+      "step": 721920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8171555521196465e-05,
+      "loss": 4.0516,
+      "step": 722432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.816318595248968e-05,
+      "loss": 4.0543,
+      "step": 722944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.815480000497916e-05,
+      "loss": 4.0447,
+      "step": 723456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.814641405746864e-05,
+      "loss": 4.0513,
+      "step": 723968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.813802810995812e-05,
+      "loss": 4.0484,
+      "step": 724480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.812965854125133e-05,
+      "loss": 4.0613,
+      "step": 724992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.812127259374081e-05,
+      "loss": 4.038,
+      "step": 725504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.811288664623029e-05,
+      "loss": 4.0486,
+      "step": 726016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.810450069871977e-05,
+      "loss": 4.0451,
+      "step": 726528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.809611475120925e-05,
+      "loss": 4.0433,
+      "step": 727040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.808774518250246e-05,
+      "loss": 4.0494,
+      "step": 727552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.807935923499194e-05,
+      "loss": 4.0478,
+      "step": 728064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.807097328748142e-05,
+      "loss": 4.054,
+      "step": 728576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.80625873399709e-05,
+      "loss": 4.0602,
+      "step": 729088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.805420139246038e-05,
+      "loss": 4.0428,
+      "step": 729600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.804581544494986e-05,
+      "loss": 4.0398,
+      "step": 730112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8037429497439345e-05,
+      "loss": 4.0409,
+      "step": 730624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8029043549928825e-05,
+      "loss": 4.0487,
+      "step": 731136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.80206576024183e-05,
+      "loss": 4.0428,
+      "step": 731648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.8012288033711514e-05,
+      "loss": 4.0473,
+      "step": 732160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.8003902086200994e-05,
+      "loss": 4.0502,
+      "step": 732672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.799551613869047e-05,
+      "loss": 4.0514,
+      "step": 733184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.798713019117995e-05,
+      "loss": 4.0484,
+      "step": 733696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.797874424366943e-05,
+      "loss": 4.0438,
+      "step": 734208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.797035829615891e-05,
+      "loss": 4.0501,
+      "step": 734720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7961988727452116e-05,
+      "loss": 4.0504,
+      "step": 735232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7953602779941596e-05,
+      "loss": 4.0517,
+      "step": 735744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.794521683243108e-05,
+      "loss": 4.0387,
+      "step": 736256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.793683088492056e-05,
+      "loss": 4.0451,
+      "step": 736768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.792846131621377e-05,
+      "loss": 4.0478,
+      "step": 737280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.792007536870325e-05,
+      "loss": 4.0572,
+      "step": 737792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.791168942119273e-05,
+      "loss": 4.0484,
+      "step": 738304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.790330347368221e-05,
+      "loss": 4.047,
+      "step": 738816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.789491752617169e-05,
+      "loss": 4.0434,
+      "step": 739328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.788653157866117e-05,
+      "loss": 4.0481,
+      "step": 739840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.787814563115065e-05,
+      "loss": 4.0262,
+      "step": 740352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.786975968364013e-05,
+      "loss": 4.0387,
+      "step": 740864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.786140649373707e-05,
+      "loss": 4.0308,
+      "step": 741376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.785302054622655e-05,
+      "loss": 4.0469,
+      "step": 741888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7844634598716036e-05,
+      "loss": 4.0399,
+      "step": 742400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7836248651205516e-05,
+      "loss": 4.041,
+      "step": 742912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7827862703694996e-05,
+      "loss": 4.0412,
+      "step": 743424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7819476756184476e-05,
+      "loss": 4.0445,
+      "step": 743936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7811107187477685e-05,
+      "loss": 4.0479,
+      "step": 744448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7802721239967165e-05,
+      "loss": 4.0419,
+      "step": 744960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7794335292456645e-05,
+      "loss": 4.0495,
+      "step": 745472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7785949344946125e-05,
+      "loss": 4.0526,
+      "step": 745984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7777563397435605e-05,
+      "loss": 4.0451,
+      "step": 746496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7769177449925085e-05,
+      "loss": 4.0454,
+      "step": 747008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7760791502414565e-05,
+      "loss": 4.0348,
+      "step": 747520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7752405554904045e-05,
+      "loss": 4.0439,
+      "step": 748032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7744035986197254e-05,
+      "loss": 4.0425,
+      "step": 748544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7735650038686734e-05,
+      "loss": 4.0279,
+      "step": 749056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.772726409117622e-05,
+      "loss": 4.0398,
+      "step": 749568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.77188781436657e-05,
+      "loss": 4.0372,
+      "step": 750080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.771049219615518e-05,
+      "loss": 4.042,
+      "step": 750592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.770212262744839e-05,
+      "loss": 4.0484,
+      "step": 751104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.769373667993787e-05,
+      "loss": 4.0389,
+      "step": 751616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.768536711123108e-05,
+      "loss": 4.0385,
+      "step": 752128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.767698116372056e-05,
+      "loss": 4.0352,
+      "step": 752640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.766859521621004e-05,
+      "loss": 4.0518,
+      "step": 753152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.766020926869952e-05,
+      "loss": 4.044,
+      "step": 753664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7651823321189e-05,
+      "loss": 4.0455,
+      "step": 754176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.764343737367848e-05,
+      "loss": 4.0313,
+      "step": 754688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.763505142616796e-05,
+      "loss": 4.0543,
+      "step": 755200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.762666547865744e-05,
+      "loss": 4.0448,
+      "step": 755712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.761827953114692e-05,
+      "loss": 4.0459,
+      "step": 756224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.760990996244013e-05,
+      "loss": 4.0442,
+      "step": 756736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7601540393733344e-05,
+      "loss": 4.0401,
+      "step": 757248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7593154446222823e-05,
+      "loss": 4.0402,
+      "step": 757760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7584768498712303e-05,
+      "loss": 4.0587,
+      "step": 758272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.757638255120178e-05,
+      "loss": 4.0306,
+      "step": 758784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7567996603691257e-05,
+      "loss": 4.0426,
+      "step": 759296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7559610656180737e-05,
+      "loss": 4.0381,
+      "step": 759808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7551224708670216e-05,
+      "loss": 4.0298,
+      "step": 760320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.7542838761159696e-05,
+      "loss": 4.0355,
+      "step": 760832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.753446919245291e-05,
+      "loss": 4.039,
+      "step": 761344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.752608324494239e-05,
+      "loss": 4.0464,
+      "step": 761856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.75177136762356e-05,
+      "loss": 4.0419,
+      "step": 762368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.750932772872508e-05,
+      "loss": 4.0405,
+      "step": 762880
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.071054458618164,
+      "eval_runtime": 303.3292,
+      "eval_samples_per_second": 1258.009,
+      "eval_steps_per_second": 39.314,
+      "step": 763200
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.750094178121456e-05,
+      "loss": 4.0402,
+      "step": 763392
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.749255583370404e-05,
+      "loss": 4.0275,
+      "step": 763904
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.748416988619352e-05,
+      "loss": 4.045,
+      "step": 764416
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7475783938683e-05,
+      "loss": 4.0426,
+      "step": 764928
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.746739799117248e-05,
+      "loss": 4.0486,
+      "step": 765440
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.745901204366196e-05,
+      "loss": 4.0386,
+      "step": 765952
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.745062609615144e-05,
+      "loss": 4.0357,
+      "step": 766464
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.744224014864092e-05,
+      "loss": 4.0301,
+      "step": 766976
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.74338542011304e-05,
+      "loss": 4.0332,
+      "step": 767488
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.742546825361988e-05,
+      "loss": 4.0428,
+      "step": 768000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.741708230610936e-05,
+      "loss": 4.0393,
+      "step": 768512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.740871273740258e-05,
+      "loss": 4.0481,
+      "step": 769024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7400343168695786e-05,
+      "loss": 4.0475,
+      "step": 769536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7391957221185266e-05,
+      "loss": 4.0278,
+      "step": 770048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7383571273674746e-05,
+      "loss": 4.0285,
+      "step": 770560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7375185326164226e-05,
+      "loss": 4.0265,
+      "step": 771072
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7366799378653706e-05,
+      "loss": 4.0448,
+      "step": 771584
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7358413431143186e-05,
+      "loss": 4.0335,
+      "step": 772096
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7350027483632666e-05,
+      "loss": 4.0313,
+      "step": 772608
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.734164153612214e-05,
+      "loss": 4.034,
+      "step": 773120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.733325558861162e-05,
+      "loss": 4.0461,
+      "step": 773632
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.73248696411011e-05,
+      "loss": 4.0384,
+      "step": 774144
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.731648369359058e-05,
+      "loss": 4.0409,
+      "step": 774656
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.730809774608006e-05,
+      "loss": 4.036,
+      "step": 775168
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7299711798569545e-05,
+      "loss": 4.039,
+      "step": 775680
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7291325851059025e-05,
+      "loss": 4.0383,
+      "step": 776192
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7282939903548505e-05,
+      "loss": 4.0222,
+      "step": 776704
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7274570334841714e-05,
+      "loss": 4.0418,
+      "step": 777216
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7266184387331194e-05,
+      "loss": 4.0278,
+      "step": 777728
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7257798439820674e-05,
+      "loss": 4.0241,
+      "step": 778240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7249412492310154e-05,
+      "loss": 4.0422,
+      "step": 778752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7241026544799634e-05,
+      "loss": 4.0334,
+      "step": 779264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7232640597289114e-05,
+      "loss": 4.0325,
+      "step": 779776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7224254649778594e-05,
+      "loss": 4.0421,
+      "step": 780288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7215868702268074e-05,
+      "loss": 4.0287,
+      "step": 780800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7207482754757554e-05,
+      "loss": 4.035,
+      "step": 781312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.719909680724703e-05,
+      "loss": 4.0351,
+      "step": 781824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7190710859736514e-05,
+      "loss": 4.0119,
+      "step": 782336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7182324912225994e-05,
+      "loss": 4.0279,
+      "step": 782848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.717395534351921e-05,
+      "loss": 4.0248,
+      "step": 783360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.716556939600868e-05,
+      "loss": 4.0245,
+      "step": 783872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.715718344849816e-05,
+      "loss": 4.0381,
+      "step": 784384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.714881387979138e-05,
+      "loss": 4.0276,
+      "step": 784896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.714042793228085e-05,
+      "loss": 4.0403,
+      "step": 785408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.713204198477033e-05,
+      "loss": 4.0335,
+      "step": 785920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.712365603725981e-05,
+      "loss": 4.0327,
+      "step": 786432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.711527008974929e-05,
+      "loss": 4.0318,
+      "step": 786944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.710688414223877e-05,
+      "loss": 4.0307,
+      "step": 787456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.709849819472825e-05,
+      "loss": 4.0118,
+      "step": 787968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.709011224721773e-05,
+      "loss": 4.0369,
+      "step": 788480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.708174267851095e-05,
+      "loss": 4.0277,
+      "step": 788992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7073373109804157e-05,
+      "loss": 4.0401,
+      "step": 789504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7064987162293637e-05,
+      "loss": 4.0254,
+      "step": 790016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7056601214783116e-05,
+      "loss": 4.0218,
+      "step": 790528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7048215267272596e-05,
+      "loss": 4.0221,
+      "step": 791040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7039829319762076e-05,
+      "loss": 4.024,
+      "step": 791552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7031443372251556e-05,
+      "loss": 4.0344,
+      "step": 792064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7023057424741036e-05,
+      "loss": 4.0216,
+      "step": 792576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7014671477230516e-05,
+      "loss": 4.0415,
+      "step": 793088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7006285529719996e-05,
+      "loss": 4.032,
+      "step": 793600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6997899582209476e-05,
+      "loss": 4.0017,
+      "step": 794112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6989513634698956e-05,
+      "loss": 4.0426,
+      "step": 794624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6981127687188436e-05,
+      "loss": 4.018,
+      "step": 795136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6972758118481645e-05,
+      "loss": 4.0096,
+      "step": 795648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.696437217097113e-05,
+      "loss": 4.0247,
+      "step": 796160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.695598622346061e-05,
+      "loss": 4.0327,
+      "step": 796672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.694760027595009e-05,
+      "loss": 4.0131,
+      "step": 797184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.69392307072433e-05,
+      "loss": 4.0231,
+      "step": 797696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.693084475973278e-05,
+      "loss": 4.0123,
+      "step": 798208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.692245881222226e-05,
+      "loss": 4.0241,
+      "step": 798720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.691407286471174e-05,
+      "loss": 4.0338,
+      "step": 799232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.690570329600495e-05,
+      "loss": 4.0229,
+      "step": 799744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.689731734849443e-05,
+      "loss": 4.0303,
+      "step": 800256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.688893140098391e-05,
+      "loss": 4.0247,
+      "step": 800768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.688054545347339e-05,
+      "loss": 4.0364,
+      "step": 801280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.687215950596287e-05,
+      "loss": 4.0179,
+      "step": 801792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6863789937256086e-05,
+      "loss": 4.0324,
+      "step": 802304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6855403989745566e-05,
+      "loss": 4.0209,
+      "step": 802816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6847034421038775e-05,
+      "loss": 4.0211,
+      "step": 803328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6838648473528255e-05,
+      "loss": 4.0277,
+      "step": 803840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6830262526017735e-05,
+      "loss": 4.0237,
+      "step": 804352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6821876578507214e-05,
+      "loss": 4.03,
+      "step": 804864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.681349063099669e-05,
+      "loss": 4.0419,
+      "step": 805376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.680510468348617e-05,
+      "loss": 4.0257,
+      "step": 805888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6796735114779383e-05,
+      "loss": 4.0156,
+      "step": 806400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6788349167268863e-05,
+      "loss": 4.0181,
+      "step": 806912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.677996321975834e-05,
+      "loss": 4.0296,
+      "step": 807424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.677157727224782e-05,
+      "loss": 4.0183,
+      "step": 807936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.676320770354104e-05,
+      "loss": 4.0316,
+      "step": 808448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.675482175603051e-05,
+      "loss": 4.0289,
+      "step": 808960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.674643580851999e-05,
+      "loss": 4.0281,
+      "step": 809472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.673804986100947e-05,
+      "loss": 4.0275,
+      "step": 809984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.672966391349895e-05,
+      "loss": 4.0204,
+      "step": 810496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.672127796598843e-05,
+      "loss": 4.0322,
+      "step": 811008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.671290839728164e-05,
+      "loss": 4.0262,
+      "step": 811520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.670452244977112e-05,
+      "loss": 4.0317,
+      "step": 812032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.669615288106434e-05,
+      "loss": 4.0218,
+      "step": 812544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.668776693355381e-05,
+      "loss": 4.0197,
+      "step": 813056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.667938098604329e-05,
+      "loss": 4.0261,
+      "step": 813568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.667099503853278e-05,
+      "loss": 4.0354,
+      "step": 814080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.666260909102226e-05,
+      "loss": 4.0285,
+      "step": 814592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.665422314351174e-05,
+      "loss": 4.0226,
+      "step": 815104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.664583719600122e-05,
+      "loss": 4.0268,
+      "step": 815616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.66374512484907e-05,
+      "loss": 4.0233,
+      "step": 816128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6629081679783906e-05,
+      "loss": 4.0112,
+      "step": 816640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6620695732273386e-05,
+      "loss": 4.0155,
+      "step": 817152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6612309784762866e-05,
+      "loss": 4.0108,
+      "step": 817664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6603923837252346e-05,
+      "loss": 4.0292,
+      "step": 818176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6595537889741826e-05,
+      "loss": 4.0194,
+      "step": 818688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6587151942231306e-05,
+      "loss": 4.0215,
+      "step": 819200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6578765994720786e-05,
+      "loss": 4.0165,
+      "step": 819712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6570396426013995e-05,
+      "loss": 4.0285,
+      "step": 820224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.656202685730721e-05,
+      "loss": 4.0243,
+      "step": 820736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.655364090979669e-05,
+      "loss": 4.0221,
+      "step": 821248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.654525496228617e-05,
+      "loss": 4.0298,
+      "step": 821760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.653686901477565e-05,
+      "loss": 4.0326,
+      "step": 822272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.652848306726513e-05,
+      "loss": 4.029,
+      "step": 822784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.652009711975461e-05,
+      "loss": 4.0225,
+      "step": 823296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.651171117224409e-05,
+      "loss": 4.0137,
+      "step": 823808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.65033416035373e-05,
+      "loss": 4.0245,
+      "step": 824320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.649495565602678e-05,
+      "loss": 4.0255,
+      "step": 824832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.648656970851626e-05,
+      "loss": 4.0026,
+      "step": 825344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.647818376100574e-05,
+      "loss": 4.0233,
+      "step": 825856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.646979781349522e-05,
+      "loss": 4.0202,
+      "step": 826368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.64614118659847e-05,
+      "loss": 4.0198,
+      "step": 826880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.645302591847418e-05,
+      "loss": 4.0303,
+      "step": 827392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.644463997096366e-05,
+      "loss": 4.0177,
+      "step": 827904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.643625402345314e-05,
+      "loss": 4.0191,
+      "step": 828416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.642788445474635e-05,
+      "loss": 4.0132,
+      "step": 828928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.641949850723583e-05,
+      "loss": 4.0346,
+      "step": 829440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.641111255972531e-05,
+      "loss": 4.022,
+      "step": 829952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.640272661221479e-05,
+      "loss": 4.0272,
+      "step": 830464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6394357043508e-05,
+      "loss": 4.0112,
+      "step": 830976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.638597109599748e-05,
+      "loss": 4.0289,
+      "step": 831488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.637758514848696e-05,
+      "loss": 4.0264,
+      "step": 832000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.636919920097644e-05,
+      "loss": 4.0267,
+      "step": 832512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.636081325346592e-05,
+      "loss": 4.0254,
+      "step": 833024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.63524273059554e-05,
+      "loss": 4.0167,
+      "step": 833536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.634404135844488e-05,
+      "loss": 4.0231,
+      "step": 834048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6335655410934364e-05,
+      "loss": 4.036,
+      "step": 834560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.63273022210313e-05,
+      "loss": 4.013,
+      "step": 835072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.631891627352078e-05,
+      "loss": 4.0205,
+      "step": 835584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6310546704814e-05,
+      "loss": 4.0175,
+      "step": 836096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.630216075730347e-05,
+      "loss": 4.0162,
+      "step": 836608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.629377480979295e-05,
+      "loss": 4.0128,
+      "step": 837120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.628538886228243e-05,
+      "loss": 4.0183,
+      "step": 837632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.627700291477191e-05,
+      "loss": 4.023,
+      "step": 838144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.626861696726139e-05,
+      "loss": 4.029,
+      "step": 838656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.626023101975087e-05,
+      "loss": 4.0207,
+      "step": 839168
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.0583062171936035,
+      "eval_runtime": 306.0123,
+      "eval_samples_per_second": 1246.979,
+      "eval_steps_per_second": 38.969,
+      "step": 839520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.625184507224035e-05,
+      "loss": 4.0097,
+      "step": 839680
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.624345912472983e-05,
+      "loss": 4.01,
+      "step": 840192
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.623507317721932e-05,
+      "loss": 4.0237,
+      "step": 840704
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.62266872297088e-05,
+      "loss": 4.024,
+      "step": 841216
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6218317661002006e-05,
+      "loss": 4.0296,
+      "step": 841728
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6209931713491486e-05,
+      "loss": 4.0218,
+      "step": 842240
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6201545765980966e-05,
+      "loss": 4.0187,
+      "step": 842752
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6193159818470446e-05,
+      "loss": 4.0104,
+      "step": 843264
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6184773870959926e-05,
+      "loss": 4.0151,
+      "step": 843776
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6176387923449406e-05,
+      "loss": 4.0162,
+      "step": 844288
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6168001975938886e-05,
+      "loss": 4.0259,
+      "step": 844800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.615961602842836e-05,
+      "loss": 4.028,
+      "step": 845312
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.615127921732904e-05,
+      "loss": 4.033,
+      "step": 845824
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.614289326981852e-05,
+      "loss": 4.004,
+      "step": 846336
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6134507322308e-05,
+      "loss": 4.017,
+      "step": 846848
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.612612137479748e-05,
+      "loss": 3.9999,
+      "step": 847360
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.611773542728696e-05,
+      "loss": 4.0252,
+      "step": 847872
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.610934947977644e-05,
+      "loss": 4.0152,
+      "step": 848384
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.610096353226592e-05,
+      "loss": 4.0083,
+      "step": 848896
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.60925775847554e-05,
+      "loss": 4.0151,
+      "step": 849408
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.608420801604861e-05,
+      "loss": 4.0322,
+      "step": 849920
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.607582206853809e-05,
+      "loss": 4.0152,
+      "step": 850432
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.606743612102757e-05,
+      "loss": 4.0227,
+      "step": 850944
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.605905017351705e-05,
+      "loss": 4.0179,
+      "step": 851456
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.605066422600653e-05,
+      "loss": 4.0196,
+      "step": 851968
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.604227827849601e-05,
+      "loss": 4.0173,
+      "step": 852480
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.603389233098549e-05,
+      "loss": 4.0068,
+      "step": 852992
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.602550638347497e-05,
+      "loss": 4.0193,
+      "step": 853504
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.601712043596445e-05,
+      "loss": 4.0143,
+      "step": 854016
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.600873448845393e-05,
+      "loss": 4.0073,
+      "step": 854528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.600036491974714e-05,
+      "loss": 4.0178,
+      "step": 855040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.599197897223662e-05,
+      "loss": 4.0138,
+      "step": 855552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.59835930247261e-05,
+      "loss": 4.0151,
+      "step": 856064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.597520707721558e-05,
+      "loss": 4.0228,
+      "step": 856576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.596683750850879e-05,
+      "loss": 4.0093,
+      "step": 857088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.595845156099827e-05,
+      "loss": 4.015,
+      "step": 857600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5950065613487747e-05,
+      "loss": 4.0154,
+      "step": 858112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5941679665977227e-05,
+      "loss": 3.9944,
+      "step": 858624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5933293718466706e-05,
+      "loss": 4.0092,
+      "step": 859136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.592490777095619e-05,
+      "loss": 4.0036,
+      "step": 859648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.591652182344567e-05,
+      "loss": 4.0037,
+      "step": 860160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.590813587593515e-05,
+      "loss": 4.0174,
+      "step": 860672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.589976630722836e-05,
+      "loss": 4.0184,
+      "step": 861184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.589139673852157e-05,
+      "loss": 4.0128,
+      "step": 861696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.588301079101105e-05,
+      "loss": 4.0186,
+      "step": 862208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.587462484350053e-05,
+      "loss": 4.011,
+      "step": 862720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.586623889599001e-05,
+      "loss": 4.0134,
+      "step": 863232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.585785294847949e-05,
+      "loss": 4.0087,
+      "step": 863744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.584946700096897e-05,
+      "loss": 3.9971,
+      "step": 864256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.584108105345845e-05,
+      "loss": 4.0133,
+      "step": 864768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.583269510594793e-05,
+      "loss": 4.013,
+      "step": 865280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.582430915843741e-05,
+      "loss": 4.0205,
+      "step": 865792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.581593958973063e-05,
+      "loss": 4.0097,
+      "step": 866304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.580755364222011e-05,
+      "loss": 4.0064,
+      "step": 866816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.579916769470959e-05,
+      "loss": 4.0028,
+      "step": 867328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5790798126002796e-05,
+      "loss": 4.0024,
+      "step": 867840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5782412178492276e-05,
+      "loss": 4.0189,
+      "step": 868352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5774026230981756e-05,
+      "loss": 3.9998,
+      "step": 868864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5765640283471236e-05,
+      "loss": 4.0255,
+      "step": 869376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5757254335960716e-05,
+      "loss": 4.014,
+      "step": 869888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.574886838845019e-05,
+      "loss": 3.988,
+      "step": 870400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.574048244093967e-05,
+      "loss": 4.0178,
+      "step": 870912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.573209649342915e-05,
+      "loss": 4.0013,
+      "step": 871424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5723726924722365e-05,
+      "loss": 3.9926,
+      "step": 871936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5715340977211845e-05,
+      "loss": 4.008,
+      "step": 872448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5706955029701325e-05,
+      "loss": 4.0126,
+      "step": 872960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5698569082190804e-05,
+      "loss": 3.9959,
+      "step": 873472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.569019951348402e-05,
+      "loss": 4.0085,
+      "step": 873984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.568182994477723e-05,
+      "loss": 3.991,
+      "step": 874496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.567344399726671e-05,
+      "loss": 4.0069,
+      "step": 875008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.566505804975619e-05,
+      "loss": 4.016,
+      "step": 875520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.565667210224566e-05,
+      "loss": 4.0068,
+      "step": 876032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.564828615473514e-05,
+      "loss": 4.0124,
+      "step": 876544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.563990020722462e-05,
+      "loss": 4.0059,
+      "step": 877056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.56315142597141e-05,
+      "loss": 4.0202,
+      "step": 877568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.562312831220358e-05,
+      "loss": 4.0028,
+      "step": 878080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.561474236469306e-05,
+      "loss": 4.0122,
+      "step": 878592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.560635641718255e-05,
+      "loss": 4.0017,
+      "step": 879104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5598003227279494e-05,
+      "loss": 4.0047,
+      "step": 879616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.558961727976897e-05,
+      "loss": 4.0142,
+      "step": 880128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.558123133225845e-05,
+      "loss": 4.0023,
+      "step": 880640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.557284538474793e-05,
+      "loss": 4.0134,
+      "step": 881152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.556445943723741e-05,
+      "loss": 4.0259,
+      "step": 881664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.555607348972689e-05,
+      "loss": 4.0092,
+      "step": 882176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5547703921020096e-05,
+      "loss": 3.9928,
+      "step": 882688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.553933435231331e-05,
+      "loss": 4.0021,
+      "step": 883200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5530948404802785e-05,
+      "loss": 4.0102,
+      "step": 883712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.552256245729227e-05,
+      "loss": 4.0062,
+      "step": 884224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.551417650978175e-05,
+      "loss": 4.0092,
+      "step": 884736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.550579056227123e-05,
+      "loss": 4.0121,
+      "step": 885248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.549740461476071e-05,
+      "loss": 4.0078,
+      "step": 885760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.548901866725019e-05,
+      "loss": 4.0093,
+      "step": 886272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.548063271973967e-05,
+      "loss": 4.0044,
+      "step": 886784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.547224677222915e-05,
+      "loss": 4.0132,
+      "step": 887296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.546386082471863e-05,
+      "loss": 4.0095,
+      "step": 887808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.545547487720811e-05,
+      "loss": 4.019,
+      "step": 888320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.544710530850132e-05,
+      "loss": 3.9975,
+      "step": 888832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.543873573979453e-05,
+      "loss": 4.0093,
+      "step": 889344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.543034979228401e-05,
+      "loss": 4.0094,
+      "step": 889856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.542196384477349e-05,
+      "loss": 4.0127,
+      "step": 890368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.541357789726297e-05,
+      "loss": 4.0137,
+      "step": 890880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5405191949752456e-05,
+      "loss": 4.002,
+      "step": 891392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5396806002241936e-05,
+      "loss": 4.008,
+      "step": 891904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5388420054731416e-05,
+      "loss": 4.0049,
+      "step": 892416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5380034107220896e-05,
+      "loss": 4.0014,
+      "step": 892928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5371648159710376e-05,
+      "loss": 3.9937,
+      "step": 893440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5363262212199856e-05,
+      "loss": 3.9951,
+      "step": 893952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.535487626468933e-05,
+      "loss": 4.0098,
+      "step": 894464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5346506695982545e-05,
+      "loss": 4.0059,
+      "step": 894976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5338120748472025e-05,
+      "loss": 3.9987,
+      "step": 895488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.53297348009615e-05,
+      "loss": 3.9964,
+      "step": 896000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.532134885345098e-05,
+      "loss": 4.0128,
+      "step": 896512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.531296290594046e-05,
+      "loss": 4.0086,
+      "step": 897024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.530457695842994e-05,
+      "loss": 4.0031,
+      "step": 897536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5296191010919425e-05,
+      "loss": 4.0138,
+      "step": 898048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5287805063408905e-05,
+      "loss": 4.0168,
+      "step": 898560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5279419115898385e-05,
+      "loss": 4.0076,
+      "step": 899072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5271033168387865e-05,
+      "loss": 4.0024,
+      "step": 899584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5262663599681074e-05,
+      "loss": 3.9999,
+      "step": 900096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.525429403097428e-05,
+      "loss": 4.0022,
+      "step": 900608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.524590808346376e-05,
+      "loss": 4.014,
+      "step": 901120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.523752213595324e-05,
+      "loss": 3.9818,
+      "step": 901632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.522913618844272e-05,
+      "loss": 4.0066,
+      "step": 902144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.52207502409322e-05,
+      "loss": 4.0031,
+      "step": 902656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.521236429342168e-05,
+      "loss": 4.0046,
+      "step": 903168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.520399472471489e-05,
+      "loss": 4.0103,
+      "step": 903680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.519560877720437e-05,
+      "loss": 4.0013,
+      "step": 904192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.518722282969386e-05,
+      "loss": 4.0012,
+      "step": 904704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.517883688218334e-05,
+      "loss": 3.9926,
+      "step": 905216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.517046731347655e-05,
+      "loss": 4.0183,
+      "step": 905728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.516208136596603e-05,
+      "loss": 4.0028,
+      "step": 906240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.515369541845551e-05,
+      "loss": 4.0136,
+      "step": 906752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.514530947094499e-05,
+      "loss": 3.9938,
+      "step": 907264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.513692352343447e-05,
+      "loss": 4.0105,
+      "step": 907776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.512853757592395e-05,
+      "loss": 4.014,
+      "step": 908288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.512015162841343e-05,
+      "loss": 4.0045,
+      "step": 908800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.511176568090291e-05,
+      "loss": 4.01,
+      "step": 909312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5103396112196116e-05,
+      "loss": 4.0001,
+      "step": 909824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5095010164685596e-05,
+      "loss": 4.0036,
+      "step": 910336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5086624217175076e-05,
+      "loss": 4.017,
+      "step": 910848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5078238269664556e-05,
+      "loss": 4.0004,
+      "step": 911360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.506986870095777e-05,
+      "loss": 4.002,
+      "step": 911872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.506148275344725e-05,
+      "loss": 4.0019,
+      "step": 912384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.505309680593673e-05,
+      "loss": 3.9957,
+      "step": 912896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.504471085842621e-05,
+      "loss": 3.9969,
+      "step": 913408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5036324910915685e-05,
+      "loss": 4.0056,
+      "step": 913920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.50279553422089e-05,
+      "loss": 4.0008,
+      "step": 914432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.501956939469838e-05,
+      "loss": 4.0089,
+      "step": 914944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.501118344718786e-05,
+      "loss": 4.0077,
+      "step": 915456
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.047134876251221,
+      "eval_runtime": 296.5129,
+      "eval_samples_per_second": 1286.929,
+      "eval_steps_per_second": 40.217,
+      "step": 915840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.5002797499677334e-05,
+      "loss": 4.0215,
+      "step": 915968
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4994411552166814e-05,
+      "loss": 3.9893,
+      "step": 916480
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4986025604656294e-05,
+      "loss": 4.0026,
+      "step": 916992
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.497763965714578e-05,
+      "loss": 4.0104,
+      "step": 917504
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.496925370963526e-05,
+      "loss": 4.007,
+      "step": 918016
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.496086776212474e-05,
+      "loss": 4.0056,
+      "step": 918528
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.495248181461422e-05,
+      "loss": 4.0017,
+      "step": 919040
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.49440958671037e-05,
+      "loss": 3.9934,
+      "step": 919552
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.493570991959318e-05,
+      "loss": 3.9958,
+      "step": 920064
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.492732397208266e-05,
+      "loss": 4.0008,
+      "step": 920576
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.491893802457214e-05,
+      "loss": 4.0092,
+      "step": 921088
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.491055207706162e-05,
+      "loss": 4.009,
+      "step": 921600
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.490218250835483e-05,
+      "loss": 4.0221,
+      "step": 922112
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.489379656084431e-05,
+      "loss": 3.9845,
+      "step": 922624
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.488541061333379e-05,
+      "loss": 3.9964,
+      "step": 923136
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.487702466582327e-05,
+      "loss": 3.9838,
+      "step": 923648
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.486863871831275e-05,
+      "loss": 4.0104,
+      "step": 924160
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.486025277080223e-05,
+      "loss": 4.0006,
+      "step": 924672
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.485186682329171e-05,
+      "loss": 3.9905,
+      "step": 925184
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.484348087578119e-05,
+      "loss": 3.9965,
+      "step": 925696
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.483509492827067e-05,
+      "loss": 4.0131,
+      "step": 926208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.482672535956388e-05,
+      "loss": 3.9987,
+      "step": 926720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.481833941205336e-05,
+      "loss": 4.0047,
+      "step": 927232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.480995346454284e-05,
+      "loss": 4.0018,
+      "step": 927744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.480156751703232e-05,
+      "loss": 4.005,
+      "step": 928256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.47931815695218e-05,
+      "loss": 3.9994,
+      "step": 928768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.478479562201128e-05,
+      "loss": 3.9895,
+      "step": 929280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.477640967450076e-05,
+      "loss": 4.0041,
+      "step": 929792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.476802372699024e-05,
+      "loss": 3.9905,
+      "step": 930304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.475963777947972e-05,
+      "loss": 3.9913,
+      "step": 930816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.475126821077293e-05,
+      "loss": 4.0023,
+      "step": 931328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4742882263262414e-05,
+      "loss": 3.9967,
+      "step": 931840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.473451269455562e-05,
+      "loss": 3.9972,
+      "step": 932352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.47261267470451e-05,
+      "loss": 4.0077,
+      "step": 932864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.471774079953458e-05,
+      "loss": 3.9941,
+      "step": 933376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.470935485202406e-05,
+      "loss": 3.9975,
+      "step": 933888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.470096890451354e-05,
+      "loss": 4.001,
+      "step": 934400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.469258295700302e-05,
+      "loss": 3.9764,
+      "step": 934912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.468421338829623e-05,
+      "loss": 3.9953,
+      "step": 935424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.467582744078571e-05,
+      "loss": 3.9852,
+      "step": 935936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.466744149327519e-05,
+      "loss": 3.99,
+      "step": 936448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.465905554576467e-05,
+      "loss": 3.998,
+      "step": 936960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.465066959825415e-05,
+      "loss": 4.0022,
+      "step": 937472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.464228365074363e-05,
+      "loss": 3.9964,
+      "step": 937984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.463391408203685e-05,
+      "loss": 4.0011,
+      "step": 938496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.462552813452633e-05,
+      "loss": 3.9973,
+      "step": 939008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.461714218701581e-05,
+      "loss": 3.994,
+      "step": 939520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.460875623950529e-05,
+      "loss": 3.9959,
+      "step": 940032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.460037029199476e-05,
+      "loss": 3.9782,
+      "step": 940544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.459198434448424e-05,
+      "loss": 3.9954,
+      "step": 941056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.458359839697372e-05,
+      "loss": 3.9993,
+      "step": 941568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.45752124494632e-05,
+      "loss": 4.0024,
+      "step": 942080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.456684288075641e-05,
+      "loss": 3.9941,
+      "step": 942592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.455845693324589e-05,
+      "loss": 3.9866,
+      "step": 943104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.455007098573537e-05,
+      "loss": 3.9845,
+      "step": 943616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4541701417028585e-05,
+      "loss": 3.9847,
+      "step": 944128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4533315469518065e-05,
+      "loss": 4.0037,
+      "step": 944640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4524929522007545e-05,
+      "loss": 3.9861,
+      "step": 945152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4516543574497025e-05,
+      "loss": 4.0065,
+      "step": 945664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4508174005790234e-05,
+      "loss": 3.9943,
+      "step": 946176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4499788058279714e-05,
+      "loss": 3.9786,
+      "step": 946688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4491402110769194e-05,
+      "loss": 3.9986,
+      "step": 947200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4483016163258674e-05,
+      "loss": 3.9855,
+      "step": 947712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.447464659455188e-05,
+      "loss": 3.978,
+      "step": 948224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.446626064704136e-05,
+      "loss": 3.9867,
+      "step": 948736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.445787469953084e-05,
+      "loss": 3.9993,
+      "step": 949248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.444948875202032e-05,
+      "loss": 3.9787,
+      "step": 949760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.44411028045098e-05,
+      "loss": 3.9952,
+      "step": 950272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.443271685699929e-05,
+      "loss": 3.9729,
+      "step": 950784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.44243472882925e-05,
+      "loss": 3.9895,
+      "step": 951296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.441596134078198e-05,
+      "loss": 4.0031,
+      "step": 951808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.440757539327146e-05,
+      "loss": 3.9928,
+      "step": 952320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.439918944576094e-05,
+      "loss": 3.9945,
+      "step": 952832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.439080349825042e-05,
+      "loss": 3.9897,
+      "step": 953344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.43824175507399e-05,
+      "loss": 4.0029,
+      "step": 953856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.437406436083684e-05,
+      "loss": 3.9877,
+      "step": 954368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.436567841332632e-05,
+      "loss": 3.997,
+      "step": 954880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.43572924658158e-05,
+      "loss": 3.9884,
+      "step": 955392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.434890651830528e-05,
+      "loss": 3.987,
+      "step": 955904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4340520570794757e-05,
+      "loss": 3.9949,
+      "step": 956416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.433213462328424e-05,
+      "loss": 3.9853,
+      "step": 956928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.432374867577372e-05,
+      "loss": 4.0004,
+      "step": 957440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.43153627282632e-05,
+      "loss": 4.0056,
+      "step": 957952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.430699315955641e-05,
+      "loss": 3.9967,
+      "step": 958464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.429860721204589e-05,
+      "loss": 3.9791,
+      "step": 958976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.429022126453537e-05,
+      "loss": 3.9855,
+      "step": 959488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.428183531702485e-05,
+      "loss": 3.9918,
+      "step": 960000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.427346574831806e-05,
+      "loss": 3.9921,
+      "step": 960512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.426507980080754e-05,
+      "loss": 3.9925,
+      "step": 961024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.425669385329702e-05,
+      "loss": 3.9963,
+      "step": 961536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.42483079057865e-05,
+      "loss": 3.9934,
+      "step": 962048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.423992195827598e-05,
+      "loss": 3.9947,
+      "step": 962560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.423153601076546e-05,
+      "loss": 3.9901,
+      "step": 963072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.422315006325494e-05,
+      "loss": 3.9975,
+      "step": 963584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.421476411574442e-05,
+      "loss": 3.9942,
+      "step": 964096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.420639454703764e-05,
+      "loss": 4.003,
+      "step": 964608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.419800859952712e-05,
+      "loss": 3.9825,
+      "step": 965120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.41896226520166e-05,
+      "loss": 3.9925,
+      "step": 965632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.418123670450607e-05,
+      "loss": 3.9949,
+      "step": 966144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4172867135799286e-05,
+      "loss": 3.9972,
+      "step": 966656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4164481188288766e-05,
+      "loss": 4.0007,
+      "step": 967168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4156111619581975e-05,
+      "loss": 3.9875,
+      "step": 967680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4147725672071455e-05,
+      "loss": 3.9927,
+      "step": 968192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4139339724560935e-05,
+      "loss": 3.9895,
+      "step": 968704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4130953777050415e-05,
+      "loss": 3.987,
+      "step": 969216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4122567829539895e-05,
+      "loss": 3.9783,
+      "step": 969728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4114181882029375e-05,
+      "loss": 3.9791,
+      "step": 970240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4105795934518855e-05,
+      "loss": 3.9931,
+      "step": 970752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.409742636581207e-05,
+      "loss": 3.9919,
+      "step": 971264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4089040418301544e-05,
+      "loss": 3.9839,
+      "step": 971776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4080654470791024e-05,
+      "loss": 3.9774,
+      "step": 972288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4072268523280504e-05,
+      "loss": 3.9979,
+      "step": 972800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4063882575769984e-05,
+      "loss": 3.9912,
+      "step": 973312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4055496628259463e-05,
+      "loss": 3.9899,
+      "step": 973824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4047110680748943e-05,
+      "loss": 3.9981,
+      "step": 974336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4038724733238423e-05,
+      "loss": 3.9999,
+      "step": 974848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.40303387857279e-05,
+      "loss": 3.991,
+      "step": 975360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.402198559582485e-05,
+      "loss": 3.9894,
+      "step": 975872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.401359964831433e-05,
+      "loss": 3.9867,
+      "step": 976384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.400521370080381e-05,
+      "loss": 3.9903,
+      "step": 976896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.399682775329329e-05,
+      "loss": 3.999,
+      "step": 977408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.398844180578277e-05,
+      "loss": 3.9667,
+      "step": 977920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.398005585827225e-05,
+      "loss": 3.9927,
+      "step": 978432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.397166991076173e-05,
+      "loss": 3.9846,
+      "step": 978944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.396328396325121e-05,
+      "loss": 3.9867,
+      "step": 979456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.395491439454442e-05,
+      "loss": 3.9946,
+      "step": 979968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.39465284470339e-05,
+      "loss": 3.9869,
+      "step": 980480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.393814249952338e-05,
+      "loss": 3.9841,
+      "step": 980992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3929772930816586e-05,
+      "loss": 3.9794,
+      "step": 981504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3921386983306066e-05,
+      "loss": 4.0018,
+      "step": 982016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.391300103579555e-05,
+      "loss": 3.9873,
+      "step": 982528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.390461508828503e-05,
+      "loss": 3.9951,
+      "step": 983040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.389622914077451e-05,
+      "loss": 3.982,
+      "step": 983552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.388784319326399e-05,
+      "loss": 3.9966,
+      "step": 984064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.387945724575347e-05,
+      "loss": 3.9954,
+      "step": 984576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.387107129824295e-05,
+      "loss": 3.9895,
+      "step": 985088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.386270172953616e-05,
+      "loss": 3.9929,
+      "step": 985600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.385431578202564e-05,
+      "loss": 3.9849,
+      "step": 986112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.384594621331885e-05,
+      "loss": 3.9924,
+      "step": 986624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.383756026580833e-05,
+      "loss": 4.0053,
+      "step": 987136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.382917431829781e-05,
+      "loss": 3.9853,
+      "step": 987648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.382078837078729e-05,
+      "loss": 3.9839,
+      "step": 988160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.381240242327677e-05,
+      "loss": 3.9901,
+      "step": 988672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.380401647576625e-05,
+      "loss": 3.9832,
+      "step": 989184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.379563052825573e-05,
+      "loss": 3.9782,
+      "step": 989696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3787260959548946e-05,
+      "loss": 3.9974,
+      "step": 990208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3778891390842155e-05,
+      "loss": 3.9815,
+      "step": 990720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3770505443331635e-05,
+      "loss": 3.9928,
+      "step": 991232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3762119495821115e-05,
+      "loss": 3.9936,
+      "step": 991744
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.038577079772949,
+      "eval_runtime": 293.4714,
+      "eval_samples_per_second": 1300.267,
+      "eval_steps_per_second": 40.634,
+      "step": 992160
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3753733548310595e-05,
+      "loss": 3.9803,
+      "step": 992256
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3745347600800075e-05,
+      "loss": 3.9769,
+      "step": 992768
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3736961653289555e-05,
+      "loss": 3.9841,
+      "step": 993280
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.372857570577903e-05,
+      "loss": 4.0005,
+      "step": 993792
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.372018975826851e-05,
+      "loss": 3.9918,
+      "step": 994304
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.371180381075799e-05,
+      "loss": 3.9935,
+      "step": 994816
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3703434242051204e-05,
+      "loss": 3.9829,
+      "step": 995328
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.369506467334442e-05,
+      "loss": 3.9816,
+      "step": 995840
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.36866787258339e-05,
+      "loss": 3.9799,
+      "step": 996352
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.367829277832338e-05,
+      "loss": 3.9928,
+      "step": 996864
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.366990683081285e-05,
+      "loss": 3.9879,
+      "step": 997376
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.366152088330233e-05,
+      "loss": 4.0008,
+      "step": 997888
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.365315131459555e-05,
+      "loss": 4.0012,
+      "step": 998400
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.364476536708503e-05,
+      "loss": 3.9721,
+      "step": 998912
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.36363794195745e-05,
+      "loss": 3.9826,
+      "step": 999424
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.362799347206398e-05,
+      "loss": 3.9708,
+      "step": 999936
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.361960752455346e-05,
+      "loss": 3.994,
+      "step": 1000448
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.361122157704294e-05,
+      "loss": 3.9836,
+      "step": 1000960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.360283562953243e-05,
+      "loss": 3.9798,
+      "step": 1001472
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.359444968202191e-05,
+      "loss": 3.9836,
+      "step": 1001984
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.358606373451139e-05,
+      "loss": 3.9946,
+      "step": 1002496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.35776941658046e-05,
+      "loss": 3.988,
+      "step": 1003008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.356930821829408e-05,
+      "loss": 3.988,
+      "step": 1003520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.356092227078356e-05,
+      "loss": 3.9882,
+      "step": 1004032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.355253632327304e-05,
+      "loss": 3.9882,
+      "step": 1004544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.354415037576252e-05,
+      "loss": 3.9884,
+      "step": 1005056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3535764428252e-05,
+      "loss": 3.9729,
+      "step": 1005568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.352739485954521e-05,
+      "loss": 3.9914,
+      "step": 1006080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3519008912034687e-05,
+      "loss": 3.9755,
+      "step": 1006592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3510622964524167e-05,
+      "loss": 3.9789,
+      "step": 1007104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3502237017013646e-05,
+      "loss": 3.987,
+      "step": 1007616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3493851069503126e-05,
+      "loss": 3.9798,
+      "step": 1008128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3485465121992606e-05,
+      "loss": 3.9841,
+      "step": 1008640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.347707917448209e-05,
+      "loss": 3.9891,
+      "step": 1009152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.34687096057753e-05,
+      "loss": 3.9835,
+      "step": 1009664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.346034003706851e-05,
+      "loss": 3.9875,
+      "step": 1010176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.345195408955799e-05,
+      "loss": 3.9834,
+      "step": 1010688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.344356814204747e-05,
+      "loss": 3.9641,
+      "step": 1011200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.343518219453695e-05,
+      "loss": 3.9823,
+      "step": 1011712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.342679624702643e-05,
+      "loss": 3.9688,
+      "step": 1012224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.341841029951591e-05,
+      "loss": 3.98,
+      "step": 1012736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3410024352005384e-05,
+      "loss": 3.9793,
+      "step": 1013248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3401638404494864e-05,
+      "loss": 3.9887,
+      "step": 1013760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.339326883578808e-05,
+      "loss": 3.9807,
+      "step": 1014272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.338488288827756e-05,
+      "loss": 3.9857,
+      "step": 1014784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.337649694076704e-05,
+      "loss": 3.9813,
+      "step": 1015296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3368127372060256e-05,
+      "loss": 3.983,
+      "step": 1015808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3359741424549736e-05,
+      "loss": 3.9796,
+      "step": 1016320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3351355477039216e-05,
+      "loss": 3.9655,
+      "step": 1016832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.334296952952869e-05,
+      "loss": 3.9791,
+      "step": 1017344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.333458358201817e-05,
+      "loss": 3.9871,
+      "step": 1017856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3326214013311385e-05,
+      "loss": 3.9891,
+      "step": 1018368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.331782806580086e-05,
+      "loss": 3.9782,
+      "step": 1018880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.330944211829034e-05,
+      "loss": 3.9748,
+      "step": 1019392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.330105617077982e-05,
+      "loss": 3.9728,
+      "step": 1019904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.32926702232693e-05,
+      "loss": 3.9682,
+      "step": 1020416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3284300654562514e-05,
+      "loss": 3.9974,
+      "step": 1020928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3275914707051994e-05,
+      "loss": 3.9684,
+      "step": 1021440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3267528759541474e-05,
+      "loss": 3.9916,
+      "step": 1021952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3259142812030954e-05,
+      "loss": 3.9798,
+      "step": 1022464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3250756864520434e-05,
+      "loss": 3.9646,
+      "step": 1022976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3242370917009914e-05,
+      "loss": 3.9828,
+      "step": 1023488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3233984969499393e-05,
+      "loss": 3.9713,
+      "step": 1024000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3225599021988873e-05,
+      "loss": 3.9656,
+      "step": 1024512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.321722945328208e-05,
+      "loss": 3.9713,
+      "step": 1025024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.320885988457529e-05,
+      "loss": 3.9837,
+      "step": 1025536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.320047393706477e-05,
+      "loss": 3.9649,
+      "step": 1026048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.319208798955425e-05,
+      "loss": 3.9801,
+      "step": 1026560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.318370204204374e-05,
+      "loss": 3.9622,
+      "step": 1027072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.317531609453322e-05,
+      "loss": 3.9729,
+      "step": 1027584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.31669301470227e-05,
+      "loss": 3.987,
+      "step": 1028096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.315854419951218e-05,
+      "loss": 3.9817,
+      "step": 1028608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.315015825200166e-05,
+      "loss": 3.9813,
+      "step": 1029120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.314177230449114e-05,
+      "loss": 3.9741,
+      "step": 1029632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.313338635698062e-05,
+      "loss": 3.9874,
+      "step": 1030144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.312501678827383e-05,
+      "loss": 3.973,
+      "step": 1030656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.311663084076331e-05,
+      "loss": 3.9803,
+      "step": 1031168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.310824489325279e-05,
+      "loss": 3.9761,
+      "step": 1031680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.309985894574227e-05,
+      "loss": 3.9747,
+      "step": 1032192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.309147299823175e-05,
+      "loss": 3.9808,
+      "step": 1032704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.308308705072122e-05,
+      "loss": 3.9738,
+      "step": 1033216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.307470110321071e-05,
+      "loss": 3.9831,
+      "step": 1033728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.306631515570019e-05,
+      "loss": 3.9922,
+      "step": 1034240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.30579455869934e-05,
+      "loss": 3.9793,
+      "step": 1034752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3049559639482876e-05,
+      "loss": 3.9655,
+      "step": 1035264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3041173691972356e-05,
+      "loss": 3.9717,
+      "step": 1035776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3032787744461836e-05,
+      "loss": 3.9795,
+      "step": 1036288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.302441817575505e-05,
+      "loss": 3.977,
+      "step": 1036800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3016032228244525e-05,
+      "loss": 3.9762,
+      "step": 1037312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3007646280734005e-05,
+      "loss": 3.9815,
+      "step": 1037824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2999260333223485e-05,
+      "loss": 3.979,
+      "step": 1038336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2990890764516694e-05,
+      "loss": 3.9791,
+      "step": 1038848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.298252119580991e-05,
+      "loss": 3.9779,
+      "step": 1039360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.297413524829939e-05,
+      "loss": 3.9838,
+      "step": 1039872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.296574930078887e-05,
+      "loss": 3.9752,
+      "step": 1040384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.295736335327835e-05,
+      "loss": 3.9903,
+      "step": 1040896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.294897740576783e-05,
+      "loss": 3.9655,
+      "step": 1041408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.294059145825731e-05,
+      "loss": 3.9843,
+      "step": 1041920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.293220551074679e-05,
+      "loss": 3.9818,
+      "step": 1042432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.292381956323627e-05,
+      "loss": 3.9816,
+      "step": 1042944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.291543361572575e-05,
+      "loss": 3.9856,
+      "step": 1043456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.290704766821523e-05,
+      "loss": 3.9747,
+      "step": 1043968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.289866172070471e-05,
+      "loss": 3.9772,
+      "step": 1044480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.289029215199792e-05,
+      "loss": 3.9768,
+      "step": 1044992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.28819062044874e-05,
+      "loss": 3.9741,
+      "step": 1045504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.287352025697688e-05,
+      "loss": 3.9624,
+      "step": 1046016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.286513430946636e-05,
+      "loss": 3.9706,
+      "step": 1046528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.285674836195584e-05,
+      "loss": 3.9747,
+      "step": 1047040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2848362414445325e-05,
+      "loss": 3.9764,
+      "step": 1047552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2839976466934805e-05,
+      "loss": 3.97,
+      "step": 1048064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2831590519424285e-05,
+      "loss": 3.9658,
+      "step": 1048576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2823204571913765e-05,
+      "loss": 3.9827,
+      "step": 1049088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.28148513820107e-05,
+      "loss": 3.9783,
+      "step": 1049600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.280646543450018e-05,
+      "loss": 3.9758,
+      "step": 1050112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.279807948698966e-05,
+      "loss": 3.9865,
+      "step": 1050624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.278969353947914e-05,
+      "loss": 3.9829,
+      "step": 1051136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.278130759196862e-05,
+      "loss": 3.9846,
+      "step": 1051648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.27729216444581e-05,
+      "loss": 3.9739,
+      "step": 1052160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.276455207575131e-05,
+      "loss": 3.9722,
+      "step": 1052672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.275616612824079e-05,
+      "loss": 3.9742,
+      "step": 1053184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.274778018073028e-05,
+      "loss": 3.9866,
+      "step": 1053696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.273939423321976e-05,
+      "loss": 3.9573,
+      "step": 1054208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.273100828570924e-05,
+      "loss": 3.9767,
+      "step": 1054720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.272262233819871e-05,
+      "loss": 3.9682,
+      "step": 1055232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.271423639068819e-05,
+      "loss": 3.9767,
+      "step": 1055744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.270585044317767e-05,
+      "loss": 3.9821,
+      "step": 1056256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.269748087447088e-05,
+      "loss": 3.9745,
+      "step": 1056768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2689111305764097e-05,
+      "loss": 3.9675,
+      "step": 1057280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2680725358253576e-05,
+      "loss": 3.9672,
+      "step": 1057792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2672339410743056e-05,
+      "loss": 3.9876,
+      "step": 1058304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.266395346323253e-05,
+      "loss": 3.9743,
+      "step": 1058816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2655583894525745e-05,
+      "loss": 3.9849,
+      "step": 1059328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.264719794701523e-05,
+      "loss": 3.967,
+      "step": 1059840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.263881199950471e-05,
+      "loss": 3.9852,
+      "step": 1060352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2630426051994185e-05,
+      "loss": 3.9824,
+      "step": 1060864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2622040104483665e-05,
+      "loss": 3.9783,
+      "step": 1061376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2613654156973145e-05,
+      "loss": 3.9742,
+      "step": 1061888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2605268209462625e-05,
+      "loss": 3.9757,
+      "step": 1062400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2596882261952105e-05,
+      "loss": 3.9802,
+      "step": 1062912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2588512693245314e-05,
+      "loss": 3.9854,
+      "step": 1063424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2580126745734794e-05,
+      "loss": 3.9774,
+      "step": 1063936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2571740798224274e-05,
+      "loss": 3.9691,
+      "step": 1064448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2563354850713754e-05,
+      "loss": 3.9786,
+      "step": 1064960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2554968903203234e-05,
+      "loss": 3.9672,
+      "step": 1065472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.254659933449645e-05,
+      "loss": 3.966,
+      "step": 1065984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.253821338698593e-05,
+      "loss": 3.9831,
+      "step": 1066496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.252982743947541e-05,
+      "loss": 3.9677,
+      "step": 1067008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.252144149196489e-05,
+      "loss": 3.9835,
+      "step": 1067520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.25130719232581e-05,
+      "loss": 3.9748,
+      "step": 1068032
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.031233787536621,
+      "eval_runtime": 293.9518,
+      "eval_samples_per_second": 1298.142,
+      "eval_steps_per_second": 40.568,
+      "step": 1068480
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.250468597574758e-05,
+      "loss": 3.9808,
+      "step": 1068544
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.249630002823706e-05,
+      "loss": 3.9686,
+      "step": 1069056
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.248791408072654e-05,
+      "loss": 3.9704,
+      "step": 1069568
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.247952813321602e-05,
+      "loss": 3.9866,
+      "step": 1070080
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.24711421857055e-05,
+      "loss": 3.9813,
+      "step": 1070592
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.246275623819498e-05,
+      "loss": 3.9776,
+      "step": 1071104
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.245437029068446e-05,
+      "loss": 3.9691,
+      "step": 1071616
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.244598434317394e-05,
+      "loss": 3.9689,
+      "step": 1072128
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.243759839566342e-05,
+      "loss": 3.966,
+      "step": 1072640
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.24292124481529e-05,
+      "loss": 3.9756,
+      "step": 1073152
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.242082650064238e-05,
+      "loss": 3.9776,
+      "step": 1073664
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.241244055313186e-05,
+      "loss": 3.986,
+      "step": 1074176
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2404070984425074e-05,
+      "loss": 3.9905,
+      "step": 1074688
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.239568503691455e-05,
+      "loss": 3.9601,
+      "step": 1075200
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.238729908940403e-05,
+      "loss": 3.9701,
+      "step": 1075712
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.237891314189351e-05,
+      "loss": 3.9617,
+      "step": 1076224
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.237052719438299e-05,
+      "loss": 3.9781,
+      "step": 1076736
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2362157625676196e-05,
+      "loss": 3.9705,
+      "step": 1077248
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2353771678165676e-05,
+      "loss": 3.9678,
+      "step": 1077760
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2345385730655156e-05,
+      "loss": 3.9682,
+      "step": 1078272
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2336999783144636e-05,
+      "loss": 3.9815,
+      "step": 1078784
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2328613835634116e-05,
+      "loss": 3.9728,
+      "step": 1079296
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.23202278881236e-05,
+      "loss": 3.9771,
+      "step": 1079808
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.231184194061308e-05,
+      "loss": 3.975,
+      "step": 1080320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.230345599310256e-05,
+      "loss": 3.9789,
+      "step": 1080832
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.229507004559204e-05,
+      "loss": 3.9726,
+      "step": 1081344
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.228668409808152e-05,
+      "loss": 3.9617,
+      "step": 1081856
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2278298150571e-05,
+      "loss": 3.9744,
+      "step": 1082368
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.226992858186421e-05,
+      "loss": 3.9628,
+      "step": 1082880
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.226154263435369e-05,
+      "loss": 3.9668,
+      "step": 1083392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.225315668684317e-05,
+      "loss": 3.9709,
+      "step": 1083904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.224478711813638e-05,
+      "loss": 3.9736,
+      "step": 1084416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.223640117062586e-05,
+      "loss": 3.9694,
+      "step": 1084928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.222801522311534e-05,
+      "loss": 3.9762,
+      "step": 1085440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.221962927560482e-05,
+      "loss": 3.9715,
+      "step": 1085952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2211259706898037e-05,
+      "loss": 3.9713,
+      "step": 1086464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2202873759387517e-05,
+      "loss": 3.9724,
+      "step": 1086976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2194487811876996e-05,
+      "loss": 3.9522,
+      "step": 1087488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2186101864366476e-05,
+      "loss": 3.9668,
+      "step": 1088000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2177715916855956e-05,
+      "loss": 3.9594,
+      "step": 1088512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.216932996934543e-05,
+      "loss": 3.9639,
+      "step": 1089024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.216094402183491e-05,
+      "loss": 3.9657,
+      "step": 1089536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.215255807432439e-05,
+      "loss": 3.9761,
+      "step": 1090048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.214417212681387e-05,
+      "loss": 3.9666,
+      "step": 1090560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.213578617930335e-05,
+      "loss": 3.973,
+      "step": 1091072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.212740023179283e-05,
+      "loss": 3.9709,
+      "step": 1091584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.211901428428231e-05,
+      "loss": 3.9711,
+      "step": 1092096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.211062833677179e-05,
+      "loss": 3.9664,
+      "step": 1092608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2102258768065005e-05,
+      "loss": 3.9545,
+      "step": 1093120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2093889199358214e-05,
+      "loss": 3.9642,
+      "step": 1093632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2085503251847694e-05,
+      "loss": 3.9746,
+      "step": 1094144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2077117304337174e-05,
+      "loss": 3.9758,
+      "step": 1094656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2068731356826654e-05,
+      "loss": 3.9668,
+      "step": 1095168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.206036178811986e-05,
+      "loss": 3.9574,
+      "step": 1095680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.205197584060934e-05,
+      "loss": 3.9644,
+      "step": 1096192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.204358989309882e-05,
+      "loss": 3.9518,
+      "step": 1096704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.20352039455883e-05,
+      "loss": 3.9833,
+      "step": 1097216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.202681799807778e-05,
+      "loss": 3.9536,
+      "step": 1097728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.201843205056726e-05,
+      "loss": 3.9778,
+      "step": 1098240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.201006248186048e-05,
+      "loss": 3.9652,
+      "step": 1098752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.200167653434996e-05,
+      "loss": 3.9542,
+      "step": 1099264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.199329058683944e-05,
+      "loss": 3.9649,
+      "step": 1099776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.198490463932892e-05,
+      "loss": 3.9646,
+      "step": 1100288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.19765186918184e-05,
+      "loss": 3.9527,
+      "step": 1100800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.196814912311161e-05,
+      "loss": 3.9525,
+      "step": 1101312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.195976317560109e-05,
+      "loss": 3.9766,
+      "step": 1101824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.195137722809057e-05,
+      "loss": 3.9566,
+      "step": 1102336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.194299128058005e-05,
+      "loss": 3.9611,
+      "step": 1102848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.193460533306953e-05,
+      "loss": 3.9497,
+      "step": 1103360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.192621938555901e-05,
+      "loss": 3.9613,
+      "step": 1103872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.191784981685222e-05,
+      "loss": 3.9734,
+      "step": 1104384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1909480248145426e-05,
+      "loss": 3.9675,
+      "step": 1104896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.190111067943864e-05,
+      "loss": 3.9648,
+      "step": 1105408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.189272473192812e-05,
+      "loss": 3.9662,
+      "step": 1105920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.18843387844176e-05,
+      "loss": 3.9734,
+      "step": 1106432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.187595283690708e-05,
+      "loss": 3.9636,
+      "step": 1106944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.186756688939656e-05,
+      "loss": 3.9636,
+      "step": 1107456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.185918094188604e-05,
+      "loss": 3.9648,
+      "step": 1107968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.185079499437552e-05,
+      "loss": 3.965,
+      "step": 1108480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1842409046865e-05,
+      "loss": 3.965,
+      "step": 1108992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.183402309935448e-05,
+      "loss": 3.9637,
+      "step": 1109504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.182563715184396e-05,
+      "loss": 3.9683,
+      "step": 1110016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.181725120433344e-05,
+      "loss": 3.9767,
+      "step": 1110528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1808865256822914e-05,
+      "loss": 3.9686,
+      "step": 1111040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1800479309312394e-05,
+      "loss": 3.9536,
+      "step": 1111552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.179210974060561e-05,
+      "loss": 3.9602,
+      "step": 1112064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.178372379309509e-05,
+      "loss": 3.9699,
+      "step": 1112576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1775354224388306e-05,
+      "loss": 3.9635,
+      "step": 1113088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1766968276877786e-05,
+      "loss": 3.9657,
+      "step": 1113600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1758582329367266e-05,
+      "loss": 3.9699,
+      "step": 1114112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.175019638185674e-05,
+      "loss": 3.968,
+      "step": 1114624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.174181043434622e-05,
+      "loss": 3.9653,
+      "step": 1115136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.17334244868357e-05,
+      "loss": 3.9625,
+      "step": 1115648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.172503853932518e-05,
+      "loss": 3.972,
+      "step": 1116160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.171665259181466e-05,
+      "loss": 3.963,
+      "step": 1116672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.170826664430414e-05,
+      "loss": 3.977,
+      "step": 1117184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.169988069679362e-05,
+      "loss": 3.9516,
+      "step": 1117696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.16914947492831e-05,
+      "loss": 3.97,
+      "step": 1118208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.168310880177258e-05,
+      "loss": 3.9704,
+      "step": 1118720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1674739233065795e-05,
+      "loss": 3.9695,
+      "step": 1119232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1666353285555275e-05,
+      "loss": 3.9718,
+      "step": 1119744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1657983716848484e-05,
+      "loss": 3.9652,
+      "step": 1120256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1649597769337964e-05,
+      "loss": 3.9639,
+      "step": 1120768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1641211821827444e-05,
+      "loss": 3.9671,
+      "step": 1121280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1632825874316924e-05,
+      "loss": 3.9576,
+      "step": 1121792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.162445630561013e-05,
+      "loss": 3.9575,
+      "step": 1122304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.161607035809961e-05,
+      "loss": 3.9537,
+      "step": 1122816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.160768441058909e-05,
+      "loss": 3.9633,
+      "step": 1123328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.159929846307857e-05,
+      "loss": 3.9654,
+      "step": 1123840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.159091251556805e-05,
+      "loss": 3.9547,
+      "step": 1124352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.158252656805753e-05,
+      "loss": 3.9497,
+      "step": 1124864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.157414062054702e-05,
+      "loss": 3.9673,
+      "step": 1125376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.15657546730365e-05,
+      "loss": 3.9717,
+      "step": 1125888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.155738510432971e-05,
+      "loss": 3.9684,
+      "step": 1126400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.154899915681919e-05,
+      "loss": 3.9682,
+      "step": 1126912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.154061320930867e-05,
+      "loss": 3.9705,
+      "step": 1127424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.153222726179815e-05,
+      "loss": 3.9695,
+      "step": 1127936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.152385769309136e-05,
+      "loss": 3.9626,
+      "step": 1128448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.151547174558084e-05,
+      "loss": 3.9604,
+      "step": 1128960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.150708579807032e-05,
+      "loss": 3.9629,
+      "step": 1129472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.14986998505598e-05,
+      "loss": 3.9729,
+      "step": 1129984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1490330281853006e-05,
+      "loss": 3.9429,
+      "step": 1130496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1481944334342486e-05,
+      "loss": 3.9664,
+      "step": 1131008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.147355838683197e-05,
+      "loss": 3.9551,
+      "step": 1131520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.146517243932145e-05,
+      "loss": 3.9636,
+      "step": 1132032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1456786491810926e-05,
+      "loss": 3.9651,
+      "step": 1132544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1448400544300406e-05,
+      "loss": 3.9647,
+      "step": 1133056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1440014596789886e-05,
+      "loss": 3.9521,
+      "step": 1133568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1431628649279366e-05,
+      "loss": 3.9566,
+      "step": 1134080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1423242701768846e-05,
+      "loss": 3.9694,
+      "step": 1134592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1414856754258326e-05,
+      "loss": 3.9648,
+      "step": 1135104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1406487185551535e-05,
+      "loss": 3.9693,
+      "step": 1135616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1398101238041015e-05,
+      "loss": 3.9591,
+      "step": 1136128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1389731669334224e-05,
+      "loss": 3.9703,
+      "step": 1136640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.138134572182371e-05,
+      "loss": 3.9705,
+      "step": 1137152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.137295977431319e-05,
+      "loss": 3.965,
+      "step": 1137664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.13645902056064e-05,
+      "loss": 3.9633,
+      "step": 1138176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.135620425809588e-05,
+      "loss": 3.9626,
+      "step": 1138688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.134781831058536e-05,
+      "loss": 3.971,
+      "step": 1139200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.133943236307484e-05,
+      "loss": 3.9727,
+      "step": 1139712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.133104641556432e-05,
+      "loss": 3.9615,
+      "step": 1140224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.13226604680538e-05,
+      "loss": 3.9543,
+      "step": 1140736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.131427452054328e-05,
+      "loss": 3.9667,
+      "step": 1141248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.130588857303276e-05,
+      "loss": 3.9553,
+      "step": 1141760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.129751900432597e-05,
+      "loss": 3.9493,
+      "step": 1142272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.128913305681545e-05,
+      "loss": 3.9733,
+      "step": 1142784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.128076348810866e-05,
+      "loss": 3.9576,
+      "step": 1143296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1272377540598144e-05,
+      "loss": 3.9708,
+      "step": 1143808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1263991593087624e-05,
+      "loss": 3.961,
+      "step": 1144320
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.024241924285889,
+      "eval_runtime": 292.5692,
+      "eval_samples_per_second": 1304.276,
+      "eval_steps_per_second": 40.76,
+      "step": 1144800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1255605645577104e-05,
+      "loss": 3.9515,
+      "step": 1144832
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1247219698066584e-05,
+      "loss": 3.9588,
+      "step": 1145344
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1238833750556064e-05,
+      "loss": 3.958,
+      "step": 1145856
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1230447803045544e-05,
+      "loss": 3.9695,
+      "step": 1146368
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1222061855535024e-05,
+      "loss": 3.969,
+      "step": 1146880
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.121369228682823e-05,
+      "loss": 3.9654,
+      "step": 1147392
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.120530633931771e-05,
+      "loss": 3.9582,
+      "step": 1147904
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.119692039180719e-05,
+      "loss": 3.9535,
+      "step": 1148416
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.11885508231004e-05,
+      "loss": 3.9543,
+      "step": 1148928
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.118016487558988e-05,
+      "loss": 3.9644,
+      "step": 1149440
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.117177892807936e-05,
+      "loss": 3.9599,
+      "step": 1149952
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.116339298056884e-05,
+      "loss": 3.9784,
+      "step": 1150464
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.115502341186206e-05,
+      "loss": 3.9757,
+      "step": 1150976
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.114663746435154e-05,
+      "loss": 3.9493,
+      "step": 1151488
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.113825151684102e-05,
+      "loss": 3.9537,
+      "step": 1152000
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.11298655693305e-05,
+      "loss": 3.9486,
+      "step": 1152512
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.112147962181998e-05,
+      "loss": 3.9631,
+      "step": 1153024
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.111309367430946e-05,
+      "loss": 3.96,
+      "step": 1153536
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.110470772679894e-05,
+      "loss": 3.9545,
+      "step": 1154048
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.109632177928841e-05,
+      "loss": 3.9507,
+      "step": 1154560
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.108793583177789e-05,
+      "loss": 3.9736,
+      "step": 1155072
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.107954988426737e-05,
+      "loss": 3.9561,
+      "step": 1155584
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.107118031556058e-05,
+      "loss": 3.9648,
+      "step": 1156096
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1062794368050066e-05,
+      "loss": 3.9643,
+      "step": 1156608
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1054408420539546e-05,
+      "loss": 3.9708,
+      "step": 1157120
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1046022473029026e-05,
+      "loss": 3.9538,
+      "step": 1157632
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1037636525518506e-05,
+      "loss": 3.9524,
+      "step": 1158144
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1029250578007986e-05,
+      "loss": 3.9566,
+      "step": 1158656
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1020864630497466e-05,
+      "loss": 3.9606,
+      "step": 1159168
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1012478682986946e-05,
+      "loss": 3.9526,
+      "step": 1159680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.1004109114280155e-05,
+      "loss": 3.9552,
+      "step": 1160192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0995723166769635e-05,
+      "loss": 3.9604,
+      "step": 1160704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0987337219259115e-05,
+      "loss": 3.9582,
+      "step": 1161216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0978951271748595e-05,
+      "loss": 3.9629,
+      "step": 1161728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0970565324238075e-05,
+      "loss": 3.9583,
+      "step": 1162240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.096221213433502e-05,
+      "loss": 3.96,
+      "step": 1162752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.09538261868245e-05,
+      "loss": 3.9557,
+      "step": 1163264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.094544023931398e-05,
+      "loss": 3.9423,
+      "step": 1163776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.093705429180346e-05,
+      "loss": 3.955,
+      "step": 1164288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.092868472309667e-05,
+      "loss": 3.9441,
+      "step": 1164800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.092029877558615e-05,
+      "loss": 3.9546,
+      "step": 1165312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.091191282807563e-05,
+      "loss": 3.9531,
+      "step": 1165824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.090352688056511e-05,
+      "loss": 3.9659,
+      "step": 1166336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.089514093305459e-05,
+      "loss": 3.9547,
+      "step": 1166848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.088675498554407e-05,
+      "loss": 3.9589,
+      "step": 1167360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.087836903803355e-05,
+      "loss": 3.9623,
+      "step": 1167872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.086998309052303e-05,
+      "loss": 3.9568,
+      "step": 1168384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.086159714301251e-05,
+      "loss": 3.9544,
+      "step": 1168896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.085322757430572e-05,
+      "loss": 3.9414,
+      "step": 1169408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.08448416267952e-05,
+      "loss": 3.9539,
+      "step": 1169920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0836455679284685e-05,
+      "loss": 3.9589,
+      "step": 1170432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0828086110577894e-05,
+      "loss": 3.9599,
+      "step": 1170944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0819700163067374e-05,
+      "loss": 3.9583,
+      "step": 1171456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.081133059436058e-05,
+      "loss": 3.9439,
+      "step": 1171968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.080294464685006e-05,
+      "loss": 3.9545,
+      "step": 1172480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.079455869933954e-05,
+      "loss": 3.9399,
+      "step": 1172992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.078617275182902e-05,
+      "loss": 3.9701,
+      "step": 1173504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.07777868043185e-05,
+      "loss": 3.9425,
+      "step": 1174016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.076940085680798e-05,
+      "loss": 3.9709,
+      "step": 1174528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.076101490929746e-05,
+      "loss": 3.9526,
+      "step": 1175040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.075262896178694e-05,
+      "loss": 3.9389,
+      "step": 1175552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.074424301427642e-05,
+      "loss": 3.9539,
+      "step": 1176064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.07358570667659e-05,
+      "loss": 3.9545,
+      "step": 1176576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.072747111925538e-05,
+      "loss": 3.939,
+      "step": 1177088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.071908517174486e-05,
+      "loss": 3.9402,
+      "step": 1177600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.071069922423434e-05,
+      "loss": 3.9637,
+      "step": 1178112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.070231327672382e-05,
+      "loss": 3.9464,
+      "step": 1178624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.069394370801703e-05,
+      "loss": 3.9459,
+      "step": 1179136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.068555776050651e-05,
+      "loss": 3.9385,
+      "step": 1179648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.067717181299599e-05,
+      "loss": 3.9488,
+      "step": 1180160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.06688022442892e-05,
+      "loss": 3.9618,
+      "step": 1180672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0660432675582416e-05,
+      "loss": 3.9576,
+      "step": 1181184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.065204672807189e-05,
+      "loss": 3.9501,
+      "step": 1181696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0643660780561376e-05,
+      "loss": 3.9554,
+      "step": 1182208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0635274833050856e-05,
+      "loss": 3.9589,
+      "step": 1182720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0626888885540336e-05,
+      "loss": 3.95,
+      "step": 1183232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0618502938029816e-05,
+      "loss": 3.9534,
+      "step": 1183744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0610116990519296e-05,
+      "loss": 3.9555,
+      "step": 1184256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0601731043008776e-05,
+      "loss": 3.9516,
+      "step": 1184768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0593345095498256e-05,
+      "loss": 3.9534,
+      "step": 1185280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0584975526791465e-05,
+      "loss": 3.9526,
+      "step": 1185792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0576605958084674e-05,
+      "loss": 3.9552,
+      "step": 1186304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0568220010574154e-05,
+      "loss": 3.9675,
+      "step": 1186816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0559834063063634e-05,
+      "loss": 3.9549,
+      "step": 1187328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0551448115553114e-05,
+      "loss": 3.9418,
+      "step": 1187840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0543062168042594e-05,
+      "loss": 3.9465,
+      "step": 1188352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0534676220532074e-05,
+      "loss": 3.9572,
+      "step": 1188864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.052630665182529e-05,
+      "loss": 3.955,
+      "step": 1189376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.051792070431477e-05,
+      "loss": 3.9485,
+      "step": 1189888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.050953475680425e-05,
+      "loss": 3.958,
+      "step": 1190400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.050114880929373e-05,
+      "loss": 3.9584,
+      "step": 1190912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.049277924058694e-05,
+      "loss": 3.951,
+      "step": 1191424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.048439329307642e-05,
+      "loss": 3.9523,
+      "step": 1191936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.04760073455659e-05,
+      "loss": 3.9633,
+      "step": 1192448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.046762139805538e-05,
+      "loss": 3.9459,
+      "step": 1192960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.045923545054486e-05,
+      "loss": 3.9646,
+      "step": 1193472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.045086588183807e-05,
+      "loss": 3.9465,
+      "step": 1193984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.044247993432755e-05,
+      "loss": 3.958,
+      "step": 1194496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.043409398681703e-05,
+      "loss": 3.9606,
+      "step": 1195008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.042570803930651e-05,
+      "loss": 3.9538,
+      "step": 1195520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.041733847059972e-05,
+      "loss": 3.9646,
+      "step": 1196032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.04089525230892e-05,
+      "loss": 3.9507,
+      "step": 1196544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0400566575578683e-05,
+      "loss": 3.9507,
+      "step": 1197056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0392180628068163e-05,
+      "loss": 3.9506,
+      "step": 1197568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0383794680557643e-05,
+      "loss": 3.9507,
+      "step": 1198080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0375408733047123e-05,
+      "loss": 3.9443,
+      "step": 1198592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0367022785536603e-05,
+      "loss": 3.9439,
+      "step": 1199104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0358636838026076e-05,
+      "loss": 3.9562,
+      "step": 1199616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0350267269319292e-05,
+      "loss": 3.9519,
+      "step": 1200128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0341881321808775e-05,
+      "loss": 3.9437,
+      "step": 1200640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0333511753101984e-05,
+      "loss": 3.9377,
+      "step": 1201152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0325125805591464e-05,
+      "loss": 3.9519,
+      "step": 1201664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0316739858080944e-05,
+      "loss": 3.9609,
+      "step": 1202176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0308353910570424e-05,
+      "loss": 3.9524,
+      "step": 1202688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0299967963059904e-05,
+      "loss": 3.9569,
+      "step": 1203200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.029158201554938e-05,
+      "loss": 3.9553,
+      "step": 1203712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.028319606803886e-05,
+      "loss": 3.9666,
+      "step": 1204224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.027481012052834e-05,
+      "loss": 3.9489,
+      "step": 1204736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0266456930625286e-05,
+      "loss": 3.95,
+      "step": 1205248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0258070983114766e-05,
+      "loss": 3.9541,
+      "step": 1205760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0249685035604246e-05,
+      "loss": 3.9576,
+      "step": 1206272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.024129908809373e-05,
+      "loss": 3.9359,
+      "step": 1206784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0232913140583202e-05,
+      "loss": 3.9489,
+      "step": 1207296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0224527193072682e-05,
+      "loss": 3.9458,
+      "step": 1207808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0216157624365898e-05,
+      "loss": 3.9478,
+      "step": 1208320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0207771676855378e-05,
+      "loss": 3.951,
+      "step": 1208832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0199385729344855e-05,
+      "loss": 3.9603,
+      "step": 1209344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0190999781834334e-05,
+      "loss": 3.942,
+      "step": 1209856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0182613834323814e-05,
+      "loss": 3.9475,
+      "step": 1210368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0174227886813294e-05,
+      "loss": 3.9505,
+      "step": 1210880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0165841939302774e-05,
+      "loss": 3.9594,
+      "step": 1211392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0157455991792254e-05,
+      "loss": 3.9559,
+      "step": 1211904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0149086423085467e-05,
+      "loss": 3.9485,
+      "step": 1212416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0140700475574947e-05,
+      "loss": 3.957,
+      "step": 1212928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0132314528064427e-05,
+      "loss": 3.9609,
+      "step": 1213440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0123928580553907e-05,
+      "loss": 3.9569,
+      "step": 1213952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0115559011847116e-05,
+      "loss": 3.9497,
+      "step": 1214464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0107173064336596e-05,
+      "loss": 3.9517,
+      "step": 1214976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0098787116826076e-05,
+      "loss": 3.9611,
+      "step": 1215488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.009040116931556e-05,
+      "loss": 3.957,
+      "step": 1216000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.008201522180504e-05,
+      "loss": 3.9553,
+      "step": 1216512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0073645653098248e-05,
+      "loss": 3.944,
+      "step": 1217024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0065276084391457e-05,
+      "loss": 3.9533,
+      "step": 1217536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0056890136880937e-05,
+      "loss": 3.9409,
+      "step": 1218048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.004850418937042e-05,
+      "loss": 3.9396,
+      "step": 1218560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.00401182418599e-05,
+      "loss": 3.9597,
+      "step": 1219072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.003173229434938e-05,
+      "loss": 3.9479,
+      "step": 1219584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.002334634683886e-05,
+      "loss": 3.9555,
+      "step": 1220096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.001496039932834e-05,
+      "loss": 3.9533,
+      "step": 1220608
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 3.000657445181782e-05,
+      "loss": 3.9432,
+      "step": 1221120
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.018515586853027,
+      "eval_runtime": 291.5041,
+      "eval_samples_per_second": 1309.042,
+      "eval_steps_per_second": 40.909,
+      "step": 1221120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.99981885043073e-05,
+      "loss": 3.9488,
+      "step": 1221632
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.998980255679678e-05,
+      "loss": 3.9457,
+      "step": 1222144
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.998141660928626e-05,
+      "loss": 3.9615,
+      "step": 1222656
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9973030661775737e-05,
+      "loss": 3.9558,
+      "step": 1223168
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9964644714265217e-05,
+      "loss": 3.9564,
+      "step": 1223680
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9956258766754697e-05,
+      "loss": 3.9486,
+      "step": 1224192
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9947872819244177e-05,
+      "loss": 3.945,
+      "step": 1224704
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9939486871733656e-05,
+      "loss": 3.9392,
+      "step": 1225216
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9931100924223136e-05,
+      "loss": 3.9568,
+      "step": 1225728
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9922714976712616e-05,
+      "loss": 3.9505,
+      "step": 1226240
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.99143290292021e-05,
+      "loss": 3.9652,
+      "step": 1226752
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.990594308169158e-05,
+      "loss": 3.9606,
+      "step": 1227264
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.989757351298479e-05,
+      "loss": 3.9441,
+      "step": 1227776
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.988918756547427e-05,
+      "loss": 3.9434,
+      "step": 1228288
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.988080161796375e-05,
+      "loss": 3.9424,
+      "step": 1228800
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.987241567045323e-05,
+      "loss": 3.9451,
+      "step": 1229312
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.986402972294271e-05,
+      "loss": 3.9501,
+      "step": 1229824
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9855643775432192e-05,
+      "loss": 3.9416,
+      "step": 1230336
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9847257827921672e-05,
+      "loss": 3.9425,
+      "step": 1230848
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9838871880411152e-05,
+      "loss": 3.9662,
+      "step": 1231360
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9830485932900625e-05,
+      "loss": 3.942,
+      "step": 1231872
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9822099985390105e-05,
+      "loss": 3.959,
+      "step": 1232384
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9813714037879585e-05,
+      "loss": 3.9532,
+      "step": 1232896
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.98053444691728e-05,
+      "loss": 3.956,
+      "step": 1233408
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9796974900466013e-05,
+      "loss": 3.942,
+      "step": 1233920
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9788588952955493e-05,
+      "loss": 3.9424,
+      "step": 1234432
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9780203005444973e-05,
+      "loss": 3.9424,
+      "step": 1234944
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9771817057934453e-05,
+      "loss": 3.9491,
+      "step": 1235456
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.976343111042393e-05,
+      "loss": 3.941,
+      "step": 1235968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9755061541717146e-05,
+      "loss": 3.9467,
+      "step": 1236480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9746675594206626e-05,
+      "loss": 3.951,
+      "step": 1236992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.97382896466961e-05,
+      "loss": 3.9485,
+      "step": 1237504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.972990369918558e-05,
+      "loss": 3.9513,
+      "step": 1238016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.972151775167506e-05,
+      "loss": 3.9504,
+      "step": 1238528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9713164561772007e-05,
+      "loss": 3.9488,
+      "step": 1239040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9704778614261487e-05,
+      "loss": 3.9443,
+      "step": 1239552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9696392666750967e-05,
+      "loss": 3.9307,
+      "step": 1240064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9688006719240447e-05,
+      "loss": 3.9481,
+      "step": 1240576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9679620771729927e-05,
+      "loss": 3.9326,
+      "step": 1241088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.96712348242194e-05,
+      "loss": 3.9432,
+      "step": 1241600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9662848876708883e-05,
+      "loss": 3.9425,
+      "step": 1242112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9654462929198363e-05,
+      "loss": 3.9536,
+      "step": 1242624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9646076981687843e-05,
+      "loss": 3.9478,
+      "step": 1243136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9637707412981052e-05,
+      "loss": 3.9433,
+      "step": 1243648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9629321465470532e-05,
+      "loss": 3.953,
+      "step": 1244160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9620935517960012e-05,
+      "loss": 3.9479,
+      "step": 1244672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9612549570449492e-05,
+      "loss": 3.9449,
+      "step": 1245184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9604163622938976e-05,
+      "loss": 3.935,
+      "step": 1245696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9595777675428456e-05,
+      "loss": 3.9387,
+      "step": 1246208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9587408106721665e-05,
+      "loss": 3.9524,
+      "step": 1246720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9579022159211145e-05,
+      "loss": 3.9464,
+      "step": 1247232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9570636211700625e-05,
+      "loss": 3.9457,
+      "step": 1247744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9562250264190105e-05,
+      "loss": 3.9348,
+      "step": 1248256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9553864316679584e-05,
+      "loss": 3.9421,
+      "step": 1248768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9545494747972797e-05,
+      "loss": 3.9289,
+      "step": 1249280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9537108800462277e-05,
+      "loss": 3.9567,
+      "step": 1249792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9528722852951757e-05,
+      "loss": 3.9353,
+      "step": 1250304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9520336905441237e-05,
+      "loss": 3.955,
+      "step": 1250816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9511950957930717e-05,
+      "loss": 3.944,
+      "step": 1251328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9503565010420197e-05,
+      "loss": 3.9318,
+      "step": 1251840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9495179062909677e-05,
+      "loss": 3.9414,
+      "step": 1252352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9486793115399157e-05,
+      "loss": 3.9452,
+      "step": 1252864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.947840716788864e-05,
+      "loss": 3.9268,
+      "step": 1253376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9470021220378113e-05,
+      "loss": 3.9284,
+      "step": 1253888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9461668030475058e-05,
+      "loss": 3.9531,
+      "step": 1254400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9453282082964538e-05,
+      "loss": 3.9407,
+      "step": 1254912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.944489613545402e-05,
+      "loss": 3.9341,
+      "step": 1255424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.94365101879435e-05,
+      "loss": 3.9262,
+      "step": 1255936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.942814061923671e-05,
+      "loss": 3.9398,
+      "step": 1256448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.941975467172619e-05,
+      "loss": 3.9519,
+      "step": 1256960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.941136872421567e-05,
+      "loss": 3.9473,
+      "step": 1257472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.940298277670515e-05,
+      "loss": 3.9383,
+      "step": 1257984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.939459682919463e-05,
+      "loss": 3.9447,
+      "step": 1258496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.938621088168411e-05,
+      "loss": 3.9487,
+      "step": 1259008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9377824934173587e-05,
+      "loss": 3.9406,
+      "step": 1259520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9369455365466803e-05,
+      "loss": 3.9391,
+      "step": 1260032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9361085796760012e-05,
+      "loss": 3.9454,
+      "step": 1260544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9352699849249492e-05,
+      "loss": 3.939,
+      "step": 1261056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9344313901738972e-05,
+      "loss": 3.9448,
+      "step": 1261568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9335927954228455e-05,
+      "loss": 3.9408,
+      "step": 1262080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9327542006717935e-05,
+      "loss": 3.9449,
+      "step": 1262592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9319156059207408e-05,
+      "loss": 3.955,
+      "step": 1263104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9310770111696888e-05,
+      "loss": 3.9462,
+      "step": 1263616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9302384164186368e-05,
+      "loss": 3.9323,
+      "step": 1264128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9293998216675848e-05,
+      "loss": 3.9378,
+      "step": 1264640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.928561226916533e-05,
+      "loss": 3.944,
+      "step": 1265152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.927722632165481e-05,
+      "loss": 3.9477,
+      "step": 1265664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.926884037414429e-05,
+      "loss": 3.9435,
+      "step": 1266176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.92604708054375e-05,
+      "loss": 3.9444,
+      "step": 1266688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.925208485792698e-05,
+      "loss": 3.9465,
+      "step": 1267200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.924369891041646e-05,
+      "loss": 3.9439,
+      "step": 1267712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.923531296290594e-05,
+      "loss": 3.9419,
+      "step": 1268224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9226927015395424e-05,
+      "loss": 3.9454,
+      "step": 1268736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9218541067884904e-05,
+      "loss": 3.9396,
+      "step": 1269248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9210155120374384e-05,
+      "loss": 3.9523,
+      "step": 1269760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9201769172863864e-05,
+      "loss": 3.9418,
+      "step": 1270272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9193399604157073e-05,
+      "loss": 3.9435,
+      "step": 1270784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9185030035450285e-05,
+      "loss": 3.9488,
+      "step": 1271296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9176644087939765e-05,
+      "loss": 3.9457,
+      "step": 1271808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9168258140429245e-05,
+      "loss": 3.951,
+      "step": 1272320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9159872192918725e-05,
+      "loss": 3.9431,
+      "step": 1272832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9151486245408205e-05,
+      "loss": 3.9386,
+      "step": 1273344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9143100297897685e-05,
+      "loss": 3.9409,
+      "step": 1273856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9134714350387165e-05,
+      "loss": 3.9425,
+      "step": 1274368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9126328402876645e-05,
+      "loss": 3.9304,
+      "step": 1274880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9117958834169857e-05,
+      "loss": 3.9285,
+      "step": 1275392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9109572886659337e-05,
+      "loss": 3.9484,
+      "step": 1275904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9101186939148817e-05,
+      "loss": 3.9424,
+      "step": 1276416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9092800991638297e-05,
+      "loss": 3.9312,
+      "step": 1276928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9084431422931506e-05,
+      "loss": 3.9313,
+      "step": 1277440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9076078233028448e-05,
+      "loss": 3.9407,
+      "step": 1277952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9067692285517928e-05,
+      "loss": 3.9501,
+      "step": 1278464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9059306338007408e-05,
+      "loss": 3.9445,
+      "step": 1278976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9050920390496888e-05,
+      "loss": 3.9419,
+      "step": 1279488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9042534442986368e-05,
+      "loss": 3.9477,
+      "step": 1280000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9034148495475848e-05,
+      "loss": 3.953,
+      "step": 1280512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.902576254796533e-05,
+      "loss": 3.9391,
+      "step": 1281024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.901737660045481e-05,
+      "loss": 3.9368,
+      "step": 1281536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.900899065294429e-05,
+      "loss": 3.9457,
+      "step": 1282048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.90006210842375e-05,
+      "loss": 3.9438,
+      "step": 1282560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.899223513672698e-05,
+      "loss": 3.9299,
+      "step": 1283072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.898384918921646e-05,
+      "loss": 3.9344,
+      "step": 1283584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.897546324170594e-05,
+      "loss": 3.9382,
+      "step": 1284096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8967077294195423e-05,
+      "loss": 3.9357,
+      "step": 1284608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8958691346684896e-05,
+      "loss": 3.9413,
+      "step": 1285120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8950305399174376e-05,
+      "loss": 3.9494,
+      "step": 1285632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8941919451663856e-05,
+      "loss": 3.9344,
+      "step": 1286144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.89335662617608e-05,
+      "loss": 3.9391,
+      "step": 1286656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8925180314250285e-05,
+      "loss": 3.9336,
+      "step": 1287168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8916794366739765e-05,
+      "loss": 3.9527,
+      "step": 1287680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8908408419229245e-05,
+      "loss": 3.9462,
+      "step": 1288192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8900022471718718e-05,
+      "loss": 3.9383,
+      "step": 1288704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8891636524208198e-05,
+      "loss": 3.9446,
+      "step": 1289216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8883250576697678e-05,
+      "loss": 3.9477,
+      "step": 1289728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.887486462918716e-05,
+      "loss": 3.9488,
+      "step": 1290240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.886649506048037e-05,
+      "loss": 3.9423,
+      "step": 1290752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.885810911296985e-05,
+      "loss": 3.9417,
+      "step": 1291264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.884972316545933e-05,
+      "loss": 3.9499,
+      "step": 1291776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.884133721794881e-05,
+      "loss": 3.9447,
+      "step": 1292288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8832967649242022e-05,
+      "loss": 3.9472,
+      "step": 1292800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8824581701731502e-05,
+      "loss": 3.9351,
+      "step": 1293312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8816195754220982e-05,
+      "loss": 3.9429,
+      "step": 1293824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8807809806710462e-05,
+      "loss": 3.9339,
+      "step": 1294336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.879944023800367e-05,
+      "loss": 3.926,
+      "step": 1294848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.879105429049315e-05,
+      "loss": 3.9526,
+      "step": 1295360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8782684721786364e-05,
+      "loss": 3.9392,
+      "step": 1295872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8774298774275844e-05,
+      "loss": 3.9447,
+      "step": 1296384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8765912826765324e-05,
+      "loss": 3.9436,
+      "step": 1296896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8757526879254804e-05,
+      "loss": 3.9272,
+      "step": 1297408
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.013129234313965,
+      "eval_runtime": 287.7456,
+      "eval_samples_per_second": 1326.14,
+      "eval_steps_per_second": 41.443,
+      "step": 1297440
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8749140931744284e-05,
+      "loss": 3.9382,
+      "step": 1297920
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8740754984233764e-05,
+      "loss": 3.9365,
+      "step": 1298432
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8732369036723244e-05,
+      "loss": 3.953,
+      "step": 1298944
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8723983089212724e-05,
+      "loss": 3.9394,
+      "step": 1299456
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8715597141702204e-05,
+      "loss": 3.9482,
+      "step": 1299968
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8707211194191687e-05,
+      "loss": 3.9328,
+      "step": 1300480
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8698825246681167e-05,
+      "loss": 3.9367,
+      "step": 1300992
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8690439299170647e-05,
+      "loss": 3.9338,
+      "step": 1301504
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8682069730463856e-05,
+      "loss": 3.9422,
+      "step": 1302016
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8673683782953336e-05,
+      "loss": 3.9413,
+      "step": 1302528
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.866531421424655e-05,
+      "loss": 3.9516,
+      "step": 1303040
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8656928266736028e-05,
+      "loss": 3.9506,
+      "step": 1303552
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8648558698029237e-05,
+      "loss": 3.936,
+      "step": 1304064
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8640172750518717e-05,
+      "loss": 3.9372,
+      "step": 1304576
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8631786803008197e-05,
+      "loss": 3.9332,
+      "step": 1305088
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8623400855497677e-05,
+      "loss": 3.9313,
+      "step": 1305600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8615014907987157e-05,
+      "loss": 3.94,
+      "step": 1306112
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.860662896047664e-05,
+      "loss": 3.9305,
+      "step": 1306624
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.859824301296612e-05,
+      "loss": 3.9335,
+      "step": 1307136
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.85898570654556e-05,
+      "loss": 3.9599,
+      "step": 1307648
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.858147111794508e-05,
+      "loss": 3.9362,
+      "step": 1308160
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.857310154923829e-05,
+      "loss": 3.9423,
+      "step": 1308672
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.856471560172777e-05,
+      "loss": 3.9432,
+      "step": 1309184
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.855632965421725e-05,
+      "loss": 3.9454,
+      "step": 1309696
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8547943706706726e-05,
+      "loss": 3.937,
+      "step": 1310208
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8539557759196206e-05,
+      "loss": 3.9363,
+      "step": 1310720
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8531188190489422e-05,
+      "loss": 3.9321,
+      "step": 1311232
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8522802242978902e-05,
+      "loss": 3.9422,
+      "step": 1311744
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.851441629546838e-05,
+      "loss": 3.9245,
+      "step": 1312256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8506030347957858e-05,
+      "loss": 3.9389,
+      "step": 1312768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8497660779251074e-05,
+      "loss": 3.9399,
+      "step": 1313280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8489274831740554e-05,
+      "loss": 3.9376,
+      "step": 1313792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8480888884230027e-05,
+      "loss": 3.9438,
+      "step": 1314304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8472502936719507e-05,
+      "loss": 3.9355,
+      "step": 1314816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8464133368012723e-05,
+      "loss": 3.9366,
+      "step": 1315328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.84557474205022e-05,
+      "loss": 3.9362,
+      "step": 1315840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.844736147299168e-05,
+      "loss": 3.9205,
+      "step": 1316352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.843897552548116e-05,
+      "loss": 3.9346,
+      "step": 1316864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.843058957797064e-05,
+      "loss": 3.9246,
+      "step": 1317376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.842220363046012e-05,
+      "loss": 3.9308,
+      "step": 1317888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.84138176829496e-05,
+      "loss": 3.9364,
+      "step": 1318400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.840543173543908e-05,
+      "loss": 3.9439,
+      "step": 1318912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8397045787928563e-05,
+      "loss": 3.9353,
+      "step": 1319424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8388659840418043e-05,
+      "loss": 3.9335,
+      "step": 1319936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8380273892907523e-05,
+      "loss": 3.945,
+      "step": 1320448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8371887945397003e-05,
+      "loss": 3.9373,
+      "step": 1320960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8363518376690212e-05,
+      "loss": 3.9315,
+      "step": 1321472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.835513242917969e-05,
+      "loss": 3.9293,
+      "step": 1321984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8346779239276633e-05,
+      "loss": 3.9257,
+      "step": 1322496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8338393291766113e-05,
+      "loss": 3.9407,
+      "step": 1323008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8330007344255593e-05,
+      "loss": 3.9369,
+      "step": 1323520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8321621396745073e-05,
+      "loss": 3.9322,
+      "step": 1324032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8313235449234553e-05,
+      "loss": 3.9296,
+      "step": 1324544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8304849501724033e-05,
+      "loss": 3.9345,
+      "step": 1325056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8296463554213516e-05,
+      "loss": 3.9192,
+      "step": 1325568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8288077606702996e-05,
+      "loss": 3.9429,
+      "step": 1326080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8279691659192476e-05,
+      "loss": 3.9273,
+      "step": 1326592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8271305711681956e-05,
+      "loss": 3.9439,
+      "step": 1327104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8262936142975165e-05,
+      "loss": 3.9332,
+      "step": 1327616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8254550195464645e-05,
+      "loss": 3.9242,
+      "step": 1328128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8246164247954125e-05,
+      "loss": 3.9285,
+      "step": 1328640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8237778300443605e-05,
+      "loss": 3.9374,
+      "step": 1329152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8229408731736818e-05,
+      "loss": 3.9173,
+      "step": 1329664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8221022784226298e-05,
+      "loss": 3.921,
+      "step": 1330176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8212636836715778e-05,
+      "loss": 3.9388,
+      "step": 1330688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8204250889205258e-05,
+      "loss": 3.9316,
+      "step": 1331200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8195864941694738e-05,
+      "loss": 3.9229,
+      "step": 1331712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8187478994184214e-05,
+      "loss": 3.9193,
+      "step": 1332224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8179093046673694e-05,
+      "loss": 3.9248,
+      "step": 1332736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8170707099163174e-05,
+      "loss": 3.9404,
+      "step": 1333248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8162337530456383e-05,
+      "loss": 3.9372,
+      "step": 1333760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.81539679617496e-05,
+      "loss": 3.9311,
+      "step": 1334272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.814558201423908e-05,
+      "loss": 3.9354,
+      "step": 1334784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.813719606672856e-05,
+      "loss": 3.9344,
+      "step": 1335296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8128810119218036e-05,
+      "loss": 3.9365,
+      "step": 1335808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.812044055051125e-05,
+      "loss": 3.9249,
+      "step": 1336320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.811207098180446e-05,
+      "loss": 3.9368,
+      "step": 1336832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.810368503429394e-05,
+      "loss": 3.932,
+      "step": 1337344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.809529908678342e-05,
+      "loss": 3.9319,
+      "step": 1337856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8086913139272904e-05,
+      "loss": 3.9361,
+      "step": 1338368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8078527191762384e-05,
+      "loss": 3.9336,
+      "step": 1338880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8070141244251857e-05,
+      "loss": 3.944,
+      "step": 1339392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8061755296741337e-05,
+      "loss": 3.9386,
+      "step": 1339904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8053369349230817e-05,
+      "loss": 3.9177,
+      "step": 1340416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8044983401720297e-05,
+      "loss": 3.9316,
+      "step": 1340928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.803661383301351e-05,
+      "loss": 3.9301,
+      "step": 1341440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.802822788550299e-05,
+      "loss": 3.9385,
+      "step": 1341952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.801984193799247e-05,
+      "loss": 3.9323,
+      "step": 1342464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.801145599048195e-05,
+      "loss": 3.9347,
+      "step": 1342976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.8003086421775158e-05,
+      "loss": 3.9352,
+      "step": 1343488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.799470047426464e-05,
+      "loss": 3.9333,
+      "step": 1344000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.798631452675412e-05,
+      "loss": 3.9346,
+      "step": 1344512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.79779285792436e-05,
+      "loss": 3.9344,
+      "step": 1345024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.796954263173308e-05,
+      "loss": 3.9252,
+      "step": 1345536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.796115668422256e-05,
+      "loss": 3.941,
+      "step": 1346048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.795278711551577e-05,
+      "loss": 3.9392,
+      "step": 1346560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.794440116800525e-05,
+      "loss": 3.9299,
+      "step": 1347072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7936015220494734e-05,
+      "loss": 3.9419,
+      "step": 1347584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7927629272984214e-05,
+      "loss": 3.9377,
+      "step": 1348096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7919243325473694e-05,
+      "loss": 3.9458,
+      "step": 1348608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7910873756766903e-05,
+      "loss": 3.9323,
+      "step": 1349120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7902487809256383e-05,
+      "loss": 3.9266,
+      "step": 1349632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7894101861745863e-05,
+      "loss": 3.9323,
+      "step": 1350144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7885715914235343e-05,
+      "loss": 3.9332,
+      "step": 1350656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7877329966724826e-05,
+      "loss": 3.9154,
+      "step": 1351168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7868944019214306e-05,
+      "loss": 3.9278,
+      "step": 1351680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7860558071703786e-05,
+      "loss": 3.9357,
+      "step": 1352192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7852172124193266e-05,
+      "loss": 3.9326,
+      "step": 1352704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7843802555486475e-05,
+      "loss": 3.9251,
+      "step": 1353216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7835416607975955e-05,
+      "loss": 3.918,
+      "step": 1353728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7827030660465435e-05,
+      "loss": 3.9303,
+      "step": 1354240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7818661091758647e-05,
+      "loss": 3.9404,
+      "step": 1354752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7810275144248127e-05,
+      "loss": 3.9342,
+      "step": 1355264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7801889196737607e-05,
+      "loss": 3.9345,
+      "step": 1355776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7793503249227087e-05,
+      "loss": 3.9341,
+      "step": 1356288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7785117301716567e-05,
+      "loss": 3.95,
+      "step": 1356800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.777674773300978e-05,
+      "loss": 3.927,
+      "step": 1357312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.776836178549926e-05,
+      "loss": 3.9273,
+      "step": 1357824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.775997583798874e-05,
+      "loss": 3.9334,
+      "step": 1358336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.775160626928195e-05,
+      "loss": 3.9379,
+      "step": 1358848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.774322032177143e-05,
+      "loss": 3.9194,
+      "step": 1359360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.773483437426091e-05,
+      "loss": 3.9224,
+      "step": 1359872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.772644842675039e-05,
+      "loss": 3.9304,
+      "step": 1360384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7718062479239872e-05,
+      "loss": 3.9285,
+      "step": 1360896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7709676531729345e-05,
+      "loss": 3.9333,
+      "step": 1361408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7701290584218825e-05,
+      "loss": 3.9378,
+      "step": 1361920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.769292101551204e-05,
+      "loss": 3.9217,
+      "step": 1362432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.768453506800152e-05,
+      "loss": 3.9322,
+      "step": 1362944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7676149120490997e-05,
+      "loss": 3.9205,
+      "step": 1363456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7667763172980477e-05,
+      "loss": 3.9445,
+      "step": 1363968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7659393604273693e-05,
+      "loss": 3.9402,
+      "step": 1364480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7651007656763166e-05,
+      "loss": 3.928,
+      "step": 1364992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7642621709252646e-05,
+      "loss": 3.9344,
+      "step": 1365504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7634235761742126e-05,
+      "loss": 3.9334,
+      "step": 1366016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.762584981423161e-05,
+      "loss": 3.9405,
+      "step": 1366528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.761746386672109e-05,
+      "loss": 3.9367,
+      "step": 1367040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.760907791921057e-05,
+      "loss": 3.9335,
+      "step": 1367552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.760069197170005e-05,
+      "loss": 3.9359,
+      "step": 1368064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.759230602418953e-05,
+      "loss": 3.9328,
+      "step": 1368576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.758392007667901e-05,
+      "loss": 3.9339,
+      "step": 1369088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.757553412916849e-05,
+      "loss": 3.9264,
+      "step": 1369600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.756714818165797e-05,
+      "loss": 3.9342,
+      "step": 1370112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7558778612951182e-05,
+      "loss": 3.9272,
+      "step": 1370624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7550392665440662e-05,
+      "loss": 3.9174,
+      "step": 1371136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.754202309673387e-05,
+      "loss": 3.9419,
+      "step": 1371648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.753363714922335e-05,
+      "loss": 3.93,
+      "step": 1372160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.752525120171283e-05,
+      "loss": 3.9349,
+      "step": 1372672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.751686525420231e-05,
+      "loss": 3.9341,
+      "step": 1373184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7508495685495523e-05,
+      "loss": 3.9219,
+      "step": 1373696
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.008572578430176,
+      "eval_runtime": 289.1556,
+      "eval_samples_per_second": 1319.674,
+      "eval_steps_per_second": 41.241,
+      "step": 1373760
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7500109737985003e-05,
+      "loss": 3.9303,
+      "step": 1374208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7491723790474483e-05,
+      "loss": 3.9225,
+      "step": 1374720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7483337842963963e-05,
+      "loss": 3.9432,
+      "step": 1375232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7474951895453443e-05,
+      "loss": 3.9269,
+      "step": 1375744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7466565947942923e-05,
+      "loss": 3.9418,
+      "step": 1376256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7458180000432403e-05,
+      "loss": 3.9257,
+      "step": 1376768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.744979405292188e-05,
+      "loss": 3.9249,
+      "step": 1377280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.744140810541136e-05,
+      "loss": 3.9213,
+      "step": 1377792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.743302215790084e-05,
+      "loss": 3.9354,
+      "step": 1378304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.742463621039032e-05,
+      "loss": 3.9308,
+      "step": 1378816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.74162502628798e-05,
+      "loss": 3.9393,
+      "step": 1379328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.740786431536928e-05,
+      "loss": 3.9402,
+      "step": 1379840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7399494746662492e-05,
+      "loss": 3.9267,
+      "step": 1380352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7391108799151972e-05,
+      "loss": 3.9275,
+      "step": 1380864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.738272285164145e-05,
+      "loss": 3.9241,
+      "step": 1381376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.737433690413093e-05,
+      "loss": 3.9229,
+      "step": 1381888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.736595095662041e-05,
+      "loss": 3.9289,
+      "step": 1382400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.735756500910989e-05,
+      "loss": 3.9228,
+      "step": 1382912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.734917906159937e-05,
+      "loss": 3.9234,
+      "step": 1383424
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.734079311408885e-05,
+      "loss": 3.9427,
+      "step": 1383936
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7332407166578335e-05,
+      "loss": 3.9286,
+      "step": 1384448
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7324021219067815e-05,
+      "loss": 3.9315,
+      "step": 1384960
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7315635271557295e-05,
+      "loss": 3.9335,
+      "step": 1385472
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7307265702850504e-05,
+      "loss": 3.9361,
+      "step": 1385984
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7298896134143713e-05,
+      "loss": 3.9266,
+      "step": 1386496
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7290510186633196e-05,
+      "loss": 3.9273,
+      "step": 1387008
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7282124239122676e-05,
+      "loss": 3.9253,
+      "step": 1387520
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7273738291612156e-05,
+      "loss": 3.9285,
+      "step": 1388032
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7265352344101636e-05,
+      "loss": 3.9177,
+      "step": 1388544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7256966396591116e-05,
+      "loss": 3.9294,
+      "step": 1389056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7248580449080596e-05,
+      "loss": 3.9278,
+      "step": 1389568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7240210880373805e-05,
+      "loss": 3.9283,
+      "step": 1390080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.723182493286329e-05,
+      "loss": 3.9348,
+      "step": 1390592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7223455364156498e-05,
+      "loss": 3.9267,
+      "step": 1391104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7215085795449707e-05,
+      "loss": 3.927,
+      "step": 1391616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7206699847939187e-05,
+      "loss": 3.926,
+      "step": 1392128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7198313900428667e-05,
+      "loss": 3.916,
+      "step": 1392640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.718992795291815e-05,
+      "loss": 3.9191,
+      "step": 1393152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.718154200540763e-05,
+      "loss": 3.9126,
+      "step": 1393664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.717315605789711e-05,
+      "loss": 3.9191,
+      "step": 1394176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.716477011038659e-05,
+      "loss": 3.9281,
+      "step": 1394688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.715638416287607e-05,
+      "loss": 3.9322,
+      "step": 1395200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7147998215365543e-05,
+      "loss": 3.9248,
+      "step": 1395712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7139612267855026e-05,
+      "loss": 3.9234,
+      "step": 1396224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7131226320344506e-05,
+      "loss": 3.9372,
+      "step": 1396736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7122840372833986e-05,
+      "loss": 3.9235,
+      "step": 1397248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7114454425323466e-05,
+      "loss": 3.925,
+      "step": 1397760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7106084856616675e-05,
+      "loss": 3.9226,
+      "step": 1398272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7097698909106155e-05,
+      "loss": 3.9112,
+      "step": 1398784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7089312961595635e-05,
+      "loss": 3.9326,
+      "step": 1399296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7080927014085115e-05,
+      "loss": 3.9251,
+      "step": 1399808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.70725410665746e-05,
+      "loss": 3.923,
+      "step": 1400320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7064171497867808e-05,
+      "loss": 3.9195,
+      "step": 1400832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7055785550357287e-05,
+      "loss": 3.9254,
+      "step": 1401344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7047399602846767e-05,
+      "loss": 3.91,
+      "step": 1401856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7039013655336247e-05,
+      "loss": 3.9344,
+      "step": 1402368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7030627707825727e-05,
+      "loss": 3.9157,
+      "step": 1402880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7022241760315207e-05,
+      "loss": 3.9353,
+      "step": 1403392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.701385581280469e-05,
+      "loss": 3.9214,
+      "step": 1403904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.700546986529417e-05,
+      "loss": 3.921,
+      "step": 1404416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.699710029658738e-05,
+      "loss": 3.9153,
+      "step": 1404928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.698873072788059e-05,
+      "loss": 3.9285,
+      "step": 1405440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.698034478037007e-05,
+      "loss": 3.9068,
+      "step": 1405952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6971958832859552e-05,
+      "loss": 3.9153,
+      "step": 1406464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6963572885349032e-05,
+      "loss": 3.9271,
+      "step": 1406976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6955186937838512e-05,
+      "loss": 3.9226,
+      "step": 1407488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6946800990327992e-05,
+      "loss": 3.9141,
+      "step": 1408000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6938415042817472e-05,
+      "loss": 3.9088,
+      "step": 1408512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6930029095306952e-05,
+      "loss": 3.912,
+      "step": 1409024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.692164314779643e-05,
+      "loss": 3.9341,
+      "step": 1409536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6913289957893373e-05,
+      "loss": 3.9257,
+      "step": 1410048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6904904010382853e-05,
+      "loss": 3.92,
+      "step": 1410560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6896534441676063e-05,
+      "loss": 3.9287,
+      "step": 1411072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6888148494165542e-05,
+      "loss": 3.9262,
+      "step": 1411584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6879762546655022e-05,
+      "loss": 3.9275,
+      "step": 1412096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6871376599144506e-05,
+      "loss": 3.9118,
+      "step": 1412608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6863007030437715e-05,
+      "loss": 3.9302,
+      "step": 1413120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6854621082927195e-05,
+      "loss": 3.927,
+      "step": 1413632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6846235135416675e-05,
+      "loss": 3.9175,
+      "step": 1414144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6837849187906155e-05,
+      "loss": 3.9306,
+      "step": 1414656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6829463240395635e-05,
+      "loss": 3.9222,
+      "step": 1415168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6821077292885115e-05,
+      "loss": 3.9308,
+      "step": 1415680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6812691345374598e-05,
+      "loss": 3.9304,
+      "step": 1416192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6804305397864078e-05,
+      "loss": 3.9122,
+      "step": 1416704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.679591945035355e-05,
+      "loss": 3.9214,
+      "step": 1417216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.678753350284303e-05,
+      "loss": 3.9199,
+      "step": 1417728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.677914755533251e-05,
+      "loss": 3.9273,
+      "step": 1418240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6770777986625727e-05,
+      "loss": 3.9266,
+      "step": 1418752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6762392039115203e-05,
+      "loss": 3.923,
+      "step": 1419264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6754006091604683e-05,
+      "loss": 3.9263,
+      "step": 1419776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6745620144094163e-05,
+      "loss": 3.9251,
+      "step": 1420288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6737234196583643e-05,
+      "loss": 3.9227,
+      "step": 1420800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6728864627876852e-05,
+      "loss": 3.9241,
+      "step": 1421312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6720478680366336e-05,
+      "loss": 3.9236,
+      "step": 1421824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6712092732855816e-05,
+      "loss": 3.9296,
+      "step": 1422336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6703706785345296e-05,
+      "loss": 3.925,
+      "step": 1422848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6695337216638505e-05,
+      "loss": 3.9205,
+      "step": 1423360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6686951269127985e-05,
+      "loss": 3.9327,
+      "step": 1423872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6678565321617465e-05,
+      "loss": 3.9272,
+      "step": 1424384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6670179374106945e-05,
+      "loss": 3.9385,
+      "step": 1424896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6661809805400157e-05,
+      "loss": 3.9206,
+      "step": 1425408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6653423857889637e-05,
+      "loss": 3.9208,
+      "step": 1425920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6645037910379117e-05,
+      "loss": 3.9235,
+      "step": 1426432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6636651962868597e-05,
+      "loss": 3.9253,
+      "step": 1426944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6628282394161806e-05,
+      "loss": 3.9038,
+      "step": 1427456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.661989644665129e-05,
+      "loss": 3.9175,
+      "step": 1427968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.661151049914077e-05,
+      "loss": 3.9261,
+      "step": 1428480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.660312455163025e-05,
+      "loss": 3.9192,
+      "step": 1428992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.659473860411973e-05,
+      "loss": 3.9204,
+      "step": 1429504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.658635265660921e-05,
+      "loss": 3.9113,
+      "step": 1430016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.657796670909869e-05,
+      "loss": 3.9184,
+      "step": 1430528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.65695971403919e-05,
+      "loss": 3.9335,
+      "step": 1431040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.656121119288138e-05,
+      "loss": 3.9202,
+      "step": 1431552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.655282524537086e-05,
+      "loss": 3.9313,
+      "step": 1432064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.654443929786034e-05,
+      "loss": 3.9239,
+      "step": 1432576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.653606972915355e-05,
+      "loss": 3.9392,
+      "step": 1433088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.652768378164303e-05,
+      "loss": 3.922,
+      "step": 1433600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6519314212936243e-05,
+      "loss": 3.9206,
+      "step": 1434112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6510928265425723e-05,
+      "loss": 3.9229,
+      "step": 1434624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6502542317915203e-05,
+      "loss": 3.9271,
+      "step": 1435136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6494172749208412e-05,
+      "loss": 3.912,
+      "step": 1435648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6485786801697892e-05,
+      "loss": 3.9128,
+      "step": 1436160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6477400854187372e-05,
+      "loss": 3.9185,
+      "step": 1436672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6469014906676852e-05,
+      "loss": 3.9234,
+      "step": 1437184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6460628959166335e-05,
+      "loss": 3.9238,
+      "step": 1437696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6452243011655815e-05,
+      "loss": 3.9313,
+      "step": 1438208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6443857064145295e-05,
+      "loss": 3.9116,
+      "step": 1438720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6435471116634775e-05,
+      "loss": 3.9197,
+      "step": 1439232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6427085169124255e-05,
+      "loss": 3.9128,
+      "step": 1439744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6418699221613735e-05,
+      "loss": 3.9361,
+      "step": 1440256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6410313274103208e-05,
+      "loss": 3.9341,
+      "step": 1440768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.640192732659269e-05,
+      "loss": 3.9201,
+      "step": 1441280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6393557757885908e-05,
+      "loss": 3.9219,
+      "step": 1441792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6385171810375387e-05,
+      "loss": 3.9244,
+      "step": 1442304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.637678586286486e-05,
+      "loss": 3.9284,
+      "step": 1442816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.636839991535434e-05,
+      "loss": 3.9321,
+      "step": 1443328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.636001396784382e-05,
+      "loss": 3.9206,
+      "step": 1443840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6351644399137033e-05,
+      "loss": 3.9261,
+      "step": 1444352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6343258451626513e-05,
+      "loss": 3.9227,
+      "step": 1444864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6334872504115993e-05,
+      "loss": 3.9303,
+      "step": 1445376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6326486556605473e-05,
+      "loss": 3.9193,
+      "step": 1445888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6318100609094953e-05,
+      "loss": 3.9238,
+      "step": 1446400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6309714661584433e-05,
+      "loss": 3.9209,
+      "step": 1446912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6301345092877645e-05,
+      "loss": 3.9057,
+      "step": 1447424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6292959145367125e-05,
+      "loss": 3.9332,
+      "step": 1447936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6284573197856605e-05,
+      "loss": 3.9206,
+      "step": 1448448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6276203629149814e-05,
+      "loss": 3.9241,
+      "step": 1448960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6267817681639294e-05,
+      "loss": 3.9268,
+      "step": 1449472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6259431734128774e-05,
+      "loss": 3.9129,
+      "step": 1449984
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.004698276519775,
+      "eval_runtime": 295.3733,
+      "eval_samples_per_second": 1291.894,
+      "eval_steps_per_second": 40.373,
+      "step": 1450080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6251045786618254e-05,
+      "loss": 3.9214,
+      "step": 1450496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6242659839107738e-05,
+      "loss": 3.9128,
+      "step": 1451008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6234273891597217e-05,
+      "loss": 3.9342,
+      "step": 1451520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6225887944086697e-05,
+      "loss": 3.9171,
+      "step": 1452032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6217501996576177e-05,
+      "loss": 3.9407,
+      "step": 1452544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6209132427869386e-05,
+      "loss": 3.9125,
+      "step": 1453056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6200746480358866e-05,
+      "loss": 3.92,
+      "step": 1453568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6192360532848346e-05,
+      "loss": 3.9085,
+      "step": 1454080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.618397458533783e-05,
+      "loss": 3.9279,
+      "step": 1454592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.617558863782731e-05,
+      "loss": 3.9239,
+      "step": 1455104
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.616720269031679e-05,
+      "loss": 3.9258,
+      "step": 1455616
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.615881674280627e-05,
+      "loss": 3.9383,
+      "step": 1456128
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.615044717409948e-05,
+      "loss": 3.9148,
+      "step": 1456640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.614206122658896e-05,
+      "loss": 3.9209,
+      "step": 1457152
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.613367527907844e-05,
+      "loss": 3.9139,
+      "step": 1457664
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6125289331567922e-05,
+      "loss": 3.9163,
+      "step": 1458176
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6116903384057395e-05,
+      "loss": 3.9189,
+      "step": 1458688
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6108517436546875e-05,
+      "loss": 3.9149,
+      "step": 1459200
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6100131489036355e-05,
+      "loss": 3.9159,
+      "step": 1459712
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.609176192032957e-05,
+      "loss": 3.931,
+      "step": 1460224
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6083375972819047e-05,
+      "loss": 3.9227,
+      "step": 1460736
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6074990025308527e-05,
+      "loss": 3.923,
+      "step": 1461248
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6066620456601743e-05,
+      "loss": 3.9276,
+      "step": 1461760
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6058234509091223e-05,
+      "loss": 3.9271,
+      "step": 1462272
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6049848561580696e-05,
+      "loss": 3.9173,
+      "step": 1462784
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6041462614070176e-05,
+      "loss": 3.919,
+      "step": 1463296
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.603307666655966e-05,
+      "loss": 3.9179,
+      "step": 1463808
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.602469071904914e-05,
+      "loss": 3.9153,
+      "step": 1464320
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.601630477153862e-05,
+      "loss": 3.913,
+      "step": 1464832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.60079188240281e-05,
+      "loss": 3.9148,
+      "step": 1465344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.599953287651758e-05,
+      "loss": 3.9244,
+      "step": 1465856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.599116330781079e-05,
+      "loss": 3.919,
+      "step": 1466368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.598277736030027e-05,
+      "loss": 3.927,
+      "step": 1466880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5974391412789752e-05,
+      "loss": 3.9197,
+      "step": 1467392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.596602184408296e-05,
+      "loss": 3.919,
+      "step": 1467904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.595763589657244e-05,
+      "loss": 3.916,
+      "step": 1468416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.594924994906192e-05,
+      "loss": 3.9107,
+      "step": 1468928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.59408640015514e-05,
+      "loss": 3.9086,
+      "step": 1469440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.593247805404088e-05,
+      "loss": 3.9052,
+      "step": 1469952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.592409210653036e-05,
+      "loss": 3.9109,
+      "step": 1470464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.591570615901984e-05,
+      "loss": 3.9212,
+      "step": 1470976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5907320211509324e-05,
+      "loss": 3.9214,
+      "step": 1471488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5898950642802533e-05,
+      "loss": 3.9188,
+      "step": 1472000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5890564695292013e-05,
+      "loss": 3.918,
+      "step": 1472512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5882178747781493e-05,
+      "loss": 3.9312,
+      "step": 1473024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5873792800270973e-05,
+      "loss": 3.9143,
+      "step": 1473536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5865423231564186e-05,
+      "loss": 3.9161,
+      "step": 1474048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5857037284053666e-05,
+      "loss": 3.9124,
+      "step": 1474560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5848651336543146e-05,
+      "loss": 3.9048,
+      "step": 1475072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5840265389032625e-05,
+      "loss": 3.928,
+      "step": 1475584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5831879441522105e-05,
+      "loss": 3.9149,
+      "step": 1476096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.582349349401158e-05,
+      "loss": 3.9155,
+      "step": 1476608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5815107546501062e-05,
+      "loss": 3.9112,
+      "step": 1477120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5806721598990542e-05,
+      "loss": 3.9226,
+      "step": 1477632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5798352030283758e-05,
+      "loss": 3.8996,
+      "step": 1478144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.578996608277323e-05,
+      "loss": 3.9228,
+      "step": 1478656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.578158013526271e-05,
+      "loss": 3.91,
+      "step": 1479168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5773210566555927e-05,
+      "loss": 3.9234,
+      "step": 1479680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5764824619045407e-05,
+      "loss": 3.9184,
+      "step": 1480192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5756438671534883e-05,
+      "loss": 3.909,
+      "step": 1480704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5748052724024363e-05,
+      "loss": 3.9046,
+      "step": 1481216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5739666776513843e-05,
+      "loss": 3.9255,
+      "step": 1481728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5731280829003323e-05,
+      "loss": 3.8996,
+      "step": 1482240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5722894881492803e-05,
+      "loss": 3.9017,
+      "step": 1482752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5714508933982283e-05,
+      "loss": 3.9229,
+      "step": 1483264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5706122986471763e-05,
+      "loss": 3.9163,
+      "step": 1483776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5697753417764976e-05,
+      "loss": 3.9037,
+      "step": 1484288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5689367470254455e-05,
+      "loss": 3.9049,
+      "step": 1484800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5680981522743935e-05,
+      "loss": 3.9035,
+      "step": 1485312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5672611954037145e-05,
+      "loss": 3.9244,
+      "step": 1485824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5664226006526624e-05,
+      "loss": 3.9175,
+      "step": 1486336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5655840059016108e-05,
+      "loss": 3.9126,
+      "step": 1486848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5647454111505588e-05,
+      "loss": 3.9192,
+      "step": 1487360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5639068163995068e-05,
+      "loss": 3.9207,
+      "step": 1487872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5630682216484548e-05,
+      "loss": 3.9222,
+      "step": 1488384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5622296268974028e-05,
+      "loss": 3.9033,
+      "step": 1488896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5613910321463508e-05,
+      "loss": 3.9194,
+      "step": 1489408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5605524373952988e-05,
+      "loss": 3.9183,
+      "step": 1489920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.55971548052462e-05,
+      "loss": 3.9079,
+      "step": 1490432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.558876885773568e-05,
+      "loss": 3.9253,
+      "step": 1490944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.558038291022516e-05,
+      "loss": 3.9129,
+      "step": 1491456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.557199696271464e-05,
+      "loss": 3.9229,
+      "step": 1491968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.556361101520412e-05,
+      "loss": 3.9231,
+      "step": 1492480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.555524144649733e-05,
+      "loss": 3.907,
+      "step": 1492992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.554685549898681e-05,
+      "loss": 3.9145,
+      "step": 1493504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5538469551476292e-05,
+      "loss": 3.9104,
+      "step": 1494016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5530083603965772e-05,
+      "loss": 3.9151,
+      "step": 1494528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5521697656455245e-05,
+      "loss": 3.9186,
+      "step": 1495040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.551332808774846e-05,
+      "loss": 3.9117,
+      "step": 1495552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.550494214023794e-05,
+      "loss": 3.9207,
+      "step": 1496064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5496556192727418e-05,
+      "loss": 3.9213,
+      "step": 1496576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5488186624020634e-05,
+      "loss": 3.9123,
+      "step": 1497088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5479800676510114e-05,
+      "loss": 3.9158,
+      "step": 1497600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5471414728999594e-05,
+      "loss": 3.9149,
+      "step": 1498112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5463028781489067e-05,
+      "loss": 3.9256,
+      "step": 1498624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5454642833978547e-05,
+      "loss": 3.9171,
+      "step": 1499136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.544625688646803e-05,
+      "loss": 3.9131,
+      "step": 1499648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.543787093895751e-05,
+      "loss": 3.9209,
+      "step": 1500160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.542948499144699e-05,
+      "loss": 3.9148,
+      "step": 1500672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.54211154227402e-05,
+      "loss": 3.9313,
+      "step": 1501184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.541272947522968e-05,
+      "loss": 3.9147,
+      "step": 1501696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.540434352771916e-05,
+      "loss": 3.9155,
+      "step": 1502208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.539595758020864e-05,
+      "loss": 3.9127,
+      "step": 1502720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.538757163269812e-05,
+      "loss": 3.9189,
+      "step": 1503232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.537920206399133e-05,
+      "loss": 3.8979,
+      "step": 1503744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.537081611648081e-05,
+      "loss": 3.9077,
+      "step": 1504256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.536243016897029e-05,
+      "loss": 3.9194,
+      "step": 1504768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.535404422145977e-05,
+      "loss": 3.9134,
+      "step": 1505280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5345674652752984e-05,
+      "loss": 3.9075,
+      "step": 1505792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5337288705242464e-05,
+      "loss": 3.906,
+      "step": 1506304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5328902757731944e-05,
+      "loss": 3.9087,
+      "step": 1506816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5320516810221424e-05,
+      "loss": 3.928,
+      "step": 1507328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5312147241514633e-05,
+      "loss": 3.9086,
+      "step": 1507840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5303761294004113e-05,
+      "loss": 3.9225,
+      "step": 1508352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5295375346493593e-05,
+      "loss": 3.9158,
+      "step": 1508864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5286989398983073e-05,
+      "loss": 3.9326,
+      "step": 1509376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5278603451472556e-05,
+      "loss": 3.917,
+      "step": 1509888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5270217503962036e-05,
+      "loss": 3.9049,
+      "step": 1510400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5261831556451516e-05,
+      "loss": 3.9166,
+      "step": 1510912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5253445608940996e-05,
+      "loss": 3.9201,
+      "step": 1511424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5245059661430476e-05,
+      "loss": 3.9023,
+      "step": 1511936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5236690092723685e-05,
+      "loss": 3.9043,
+      "step": 1512448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5228304145213165e-05,
+      "loss": 3.9129,
+      "step": 1512960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5219918197702648e-05,
+      "loss": 3.9162,
+      "step": 1513472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5211532250192128e-05,
+      "loss": 3.9125,
+      "step": 1513984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.52031463026816e-05,
+      "loss": 3.9278,
+      "step": 1514496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5194776733974817e-05,
+      "loss": 3.9006,
+      "step": 1515008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5186407165268026e-05,
+      "loss": 3.9125,
+      "step": 1515520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.517802121775751e-05,
+      "loss": 3.9048,
+      "step": 1516032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.516963527024699e-05,
+      "loss": 3.9273,
+      "step": 1516544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.516124932273647e-05,
+      "loss": 3.9261,
+      "step": 1517056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.515286337522595e-05,
+      "loss": 3.9146,
+      "step": 1517568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.514447742771543e-05,
+      "loss": 3.9108,
+      "step": 1518080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5136091480204903e-05,
+      "loss": 3.9189,
+      "step": 1518592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5127705532694386e-05,
+      "loss": 3.9181,
+      "step": 1519104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5119319585183866e-05,
+      "loss": 3.9185,
+      "step": 1519616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5110933637673346e-05,
+      "loss": 3.9157,
+      "step": 1520128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5102547690162826e-05,
+      "loss": 3.9157,
+      "step": 1520640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5094161742652306e-05,
+      "loss": 3.9172,
+      "step": 1521152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5085792173945515e-05,
+      "loss": 3.9239,
+      "step": 1521664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5077406226434995e-05,
+      "loss": 3.9091,
+      "step": 1522176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5069020278924478e-05,
+      "loss": 3.9119,
+      "step": 1522688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5060634331413958e-05,
+      "loss": 3.9116,
+      "step": 1523200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5052281141510903e-05,
+      "loss": 3.902,
+      "step": 1523712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5043895194000376e-05,
+      "loss": 3.9216,
+      "step": 1524224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5035509246489856e-05,
+      "loss": 3.9141,
+      "step": 1524736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.502712329897934e-05,
+      "loss": 3.9173,
+      "step": 1525248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.501873735146882e-05,
+      "loss": 3.9147,
+      "step": 1525760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.50103514039583e-05,
+      "loss": 3.9121,
+      "step": 1526272
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.001376152038574,
+      "eval_runtime": 292.7259,
+      "eval_samples_per_second": 1303.578,
+      "eval_steps_per_second": 40.738,
+      "step": 1526400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.500196545644778e-05,
+      "loss": 3.9164,
+      "step": 1526784
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.499357950893726e-05,
+      "loss": 3.9046,
+      "step": 1527296
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.498519356142674e-05,
+      "loss": 3.9244,
+      "step": 1527808
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.497680761391622e-05,
+      "loss": 3.9103,
+      "step": 1528320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.49684216664057e-05,
+      "loss": 3.9294,
+      "step": 1528832
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.496003571889518e-05,
+      "loss": 3.9126,
+      "step": 1529344
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.495164977138466e-05,
+      "loss": 3.9095,
+      "step": 1529856
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.494326382387414e-05,
+      "loss": 3.9017,
+      "step": 1530368
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.493487787636362e-05,
+      "loss": 3.9136,
+      "step": 1530880
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.49264919288531e-05,
+      "loss": 3.9194,
+      "step": 1531392
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.491810598134258e-05,
+      "loss": 3.9191,
+      "step": 1531904
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4909720033832056e-05,
+      "loss": 3.9252,
+      "step": 1532416
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.490135046512527e-05,
+      "loss": 3.9105,
+      "step": 1532928
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.489296451761475e-05,
+      "loss": 3.9114,
+      "step": 1533440
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.488457857010423e-05,
+      "loss": 3.9072,
+      "step": 1533952
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4876192622593708e-05,
+      "loss": 3.9052,
+      "step": 1534464
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4867806675083188e-05,
+      "loss": 3.9139,
+      "step": 1534976
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4859420727572668e-05,
+      "loss": 3.91,
+      "step": 1535488
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4851034780062148e-05,
+      "loss": 3.9084,
+      "step": 1536000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4842648832551628e-05,
+      "loss": 3.923,
+      "step": 1536512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.483426288504111e-05,
+      "loss": 3.9173,
+      "step": 1537024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.482587693753059e-05,
+      "loss": 3.916,
+      "step": 1537536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4817490990020068e-05,
+      "loss": 3.9154,
+      "step": 1538048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4809105042509548e-05,
+      "loss": 3.9206,
+      "step": 1538560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4800719094999028e-05,
+      "loss": 3.9053,
+      "step": 1539072
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.479234952629224e-05,
+      "loss": 3.9183,
+      "step": 1539584
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4783979957585452e-05,
+      "loss": 3.9052,
+      "step": 1540096
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4775594010074932e-05,
+      "loss": 3.9093,
+      "step": 1540608
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4767208062564412e-05,
+      "loss": 3.9065,
+      "step": 1541120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.475882211505389e-05,
+      "loss": 3.9117,
+      "step": 1541632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.47504525463471e-05,
+      "loss": 3.9145,
+      "step": 1542144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.474206659883658e-05,
+      "loss": 3.9155,
+      "step": 1542656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4733680651326065e-05,
+      "loss": 3.9176,
+      "step": 1543168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.472529470381554e-05,
+      "loss": 3.9127,
+      "step": 1543680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4716925135108754e-05,
+      "loss": 3.9082,
+      "step": 1544192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4708539187598234e-05,
+      "loss": 3.9111,
+      "step": 1544704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.470015324008771e-05,
+      "loss": 3.9054,
+      "step": 1545216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4691767292577194e-05,
+      "loss": 3.8994,
+      "step": 1545728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4683381345066674e-05,
+      "loss": 3.9012,
+      "step": 1546240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4674995397556154e-05,
+      "loss": 3.9002,
+      "step": 1546752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4666609450045634e-05,
+      "loss": 3.9126,
+      "step": 1547264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4658223502535113e-05,
+      "loss": 3.9162,
+      "step": 1547776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4649837555024593e-05,
+      "loss": 3.9106,
+      "step": 1548288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.464145160751407e-05,
+      "loss": 3.9066,
+      "step": 1548800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4633082038807282e-05,
+      "loss": 3.9224,
+      "step": 1549312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4624696091296766e-05,
+      "loss": 3.9098,
+      "step": 1549824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4616326522589975e-05,
+      "loss": 3.9094,
+      "step": 1550336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4607940575079455e-05,
+      "loss": 3.9041,
+      "step": 1550848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4599554627568935e-05,
+      "loss": 3.8939,
+      "step": 1551360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4591168680058415e-05,
+      "loss": 3.9212,
+      "step": 1551872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4582782732547895e-05,
+      "loss": 3.9085,
+      "step": 1552384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4574396785037375e-05,
+      "loss": 3.9104,
+      "step": 1552896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4566010837526855e-05,
+      "loss": 3.9073,
+      "step": 1553408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4557624890016335e-05,
+      "loss": 3.9069,
+      "step": 1553920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4549238942505815e-05,
+      "loss": 3.8969,
+      "step": 1554432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4540852994995295e-05,
+      "loss": 3.9089,
+      "step": 1554944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4532467047484774e-05,
+      "loss": 3.9059,
+      "step": 1555456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.452408109997425e-05,
+      "loss": 3.9181,
+      "step": 1555968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4515711531267467e-05,
+      "loss": 3.9086,
+      "step": 1556480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4507325583756947e-05,
+      "loss": 3.9036,
+      "step": 1556992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4498939636246423e-05,
+      "loss": 3.8974,
+      "step": 1557504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4490553688735903e-05,
+      "loss": 3.9167,
+      "step": 1558016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4482184120029116e-05,
+      "loss": 3.8945,
+      "step": 1558528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.447381455132233e-05,
+      "loss": 3.8928,
+      "step": 1559040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.446542860381181e-05,
+      "loss": 3.9104,
+      "step": 1559552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4457042656301288e-05,
+      "loss": 3.9132,
+      "step": 1560064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4448656708790768e-05,
+      "loss": 3.8969,
+      "step": 1560576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4440287140083977e-05,
+      "loss": 3.8977,
+      "step": 1561088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.443191757137719e-05,
+      "loss": 3.8929,
+      "step": 1561600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.442353162386667e-05,
+      "loss": 3.9119,
+      "step": 1562112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.441514567635615e-05,
+      "loss": 3.9108,
+      "step": 1562624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.440675972884563e-05,
+      "loss": 3.9062,
+      "step": 1563136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.439837378133511e-05,
+      "loss": 3.9102,
+      "step": 1563648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.438998783382459e-05,
+      "loss": 3.9101,
+      "step": 1564160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.438160188631407e-05,
+      "loss": 3.9187,
+      "step": 1564672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.437321593880355e-05,
+      "loss": 3.8992,
+      "step": 1565184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.436482999129303e-05,
+      "loss": 3.9076,
+      "step": 1565696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.435644404378251e-05,
+      "loss": 3.9091,
+      "step": 1566208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.434805809627199e-05,
+      "loss": 3.8967,
+      "step": 1566720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.433967214876147e-05,
+      "loss": 3.9202,
+      "step": 1567232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.433130258005468e-05,
+      "loss": 3.9062,
+      "step": 1567744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.432291663254416e-05,
+      "loss": 3.9135,
+      "step": 1568256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4314530685033642e-05,
+      "loss": 3.9197,
+      "step": 1568768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.430614473752312e-05,
+      "loss": 3.8992,
+      "step": 1569280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.429777516881633e-05,
+      "loss": 3.9054,
+      "step": 1569792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.428938922130581e-05,
+      "loss": 3.9028,
+      "step": 1570304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.428100327379529e-05,
+      "loss": 3.9069,
+      "step": 1570816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.427261732628477e-05,
+      "loss": 3.9101,
+      "step": 1571328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4264247757577983e-05,
+      "loss": 3.9063,
+      "step": 1571840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4255861810067463e-05,
+      "loss": 3.9112,
+      "step": 1572352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4247475862556943e-05,
+      "loss": 3.9141,
+      "step": 1572864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4239089915046423e-05,
+      "loss": 3.9033,
+      "step": 1573376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4230703967535903e-05,
+      "loss": 3.9082,
+      "step": 1573888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4222334398829112e-05,
+      "loss": 3.9089,
+      "step": 1574400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4213948451318595e-05,
+      "loss": 3.9158,
+      "step": 1574912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4205562503808075e-05,
+      "loss": 3.911,
+      "step": 1575424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4197176556297552e-05,
+      "loss": 3.9048,
+      "step": 1575936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4188790608787032e-05,
+      "loss": 3.9144,
+      "step": 1576448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4180404661276512e-05,
+      "loss": 3.908,
+      "step": 1576960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4172018713765992e-05,
+      "loss": 3.9253,
+      "step": 1577472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4163632766255472e-05,
+      "loss": 3.9081,
+      "step": 1577984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4155246818744952e-05,
+      "loss": 3.9059,
+      "step": 1578496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4146877250038164e-05,
+      "loss": 3.9039,
+      "step": 1579008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4138507681331377e-05,
+      "loss": 3.9125,
+      "step": 1579520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4130138112624586e-05,
+      "loss": 3.8899,
+      "step": 1580032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4121752165114066e-05,
+      "loss": 3.8999,
+      "step": 1580544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.411336621760355e-05,
+      "loss": 3.9094,
+      "step": 1581056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4104980270093026e-05,
+      "loss": 3.9105,
+      "step": 1581568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4096594322582506e-05,
+      "loss": 3.9011,
+      "step": 1582080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4088208375071986e-05,
+      "loss": 3.8968,
+      "step": 1582592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4079822427561466e-05,
+      "loss": 3.9026,
+      "step": 1583104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4071436480050945e-05,
+      "loss": 3.9177,
+      "step": 1583616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4063066911344158e-05,
+      "loss": 3.9046,
+      "step": 1584128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4054680963833638e-05,
+      "loss": 3.9128,
+      "step": 1584640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4046295016323118e-05,
+      "loss": 3.9099,
+      "step": 1585152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4037909068812598e-05,
+      "loss": 3.9192,
+      "step": 1585664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4029523121302078e-05,
+      "loss": 3.9111,
+      "step": 1586176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4021153552595287e-05,
+      "loss": 3.8988,
+      "step": 1586688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4012767605084767e-05,
+      "loss": 3.9068,
+      "step": 1587200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.400438165757425e-05,
+      "loss": 3.9137,
+      "step": 1587712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.399601208886746e-05,
+      "loss": 3.8971,
+      "step": 1588224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.398762614135694e-05,
+      "loss": 3.898,
+      "step": 1588736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.397924019384642e-05,
+      "loss": 3.9024,
+      "step": 1589248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.39708542463359e-05,
+      "loss": 3.9059,
+      "step": 1589760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.396246829882538e-05,
+      "loss": 3.9068,
+      "step": 1590272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.395408235131486e-05,
+      "loss": 3.9169,
+      "step": 1590784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.394569640380434e-05,
+      "loss": 3.8918,
+      "step": 1591296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.393731045629382e-05,
+      "loss": 3.9064,
+      "step": 1591808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3928940887587028e-05,
+      "loss": 3.8948,
+      "step": 1592320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3920554940076508e-05,
+      "loss": 3.9223,
+      "step": 1592832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3912168992565988e-05,
+      "loss": 3.9107,
+      "step": 1593344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3903783045055468e-05,
+      "loss": 3.9144,
+      "step": 1593856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.389539709754495e-05,
+      "loss": 3.8993,
+      "step": 1594368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.388701115003443e-05,
+      "loss": 3.9125,
+      "step": 1594880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3878657960131373e-05,
+      "loss": 3.9157,
+      "step": 1595392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3870272012620853e-05,
+      "loss": 3.9081,
+      "step": 1595904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.386188606511033e-05,
+      "loss": 3.9066,
+      "step": 1596416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3853500117599813e-05,
+      "loss": 3.9141,
+      "step": 1596928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3845114170089293e-05,
+      "loss": 3.9066,
+      "step": 1597440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3836728222578773e-05,
+      "loss": 3.9182,
+      "step": 1597952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3828342275068253e-05,
+      "loss": 3.898,
+      "step": 1598464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3819956327557733e-05,
+      "loss": 3.9066,
+      "step": 1598976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.381157038004721e-05,
+      "loss": 3.9034,
+      "step": 1599488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.380318443253669e-05,
+      "loss": 3.8973,
+      "step": 1600000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3794814863829905e-05,
+      "loss": 3.9105,
+      "step": 1600512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3786428916319385e-05,
+      "loss": 3.908,
+      "step": 1601024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.377804296880886e-05,
+      "loss": 3.9093,
+      "step": 1601536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.376965702129834e-05,
+      "loss": 3.9064,
+      "step": 1602048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.376127107378782e-05,
+      "loss": 3.9048,
+      "step": 1602560
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 3.997973680496216,
+      "eval_runtime": 291.5436,
+      "eval_samples_per_second": 1308.864,
+      "eval_steps_per_second": 40.903,
+      "step": 1602720
+    }
+  ],
+  "logging_steps": 512,
+  "max_steps": 3052726,
+  "num_train_epochs": 9223372036854775807,
+  "save_steps": 10,
+  "total_flos": 6.586476828918013e+17,
+  "trial_name": null,
+  "trial_params": null
+}