diff --git a/README.md b/README.md index 08371015f02382e6fcba318f4aaea54ae52cd3c4..5f4f5625039e17bc7326cc10fbbc7896bb06453b 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,6 @@ library_name: peft ## Training procedure -The following `bitsandbytes` quantization config was used during training: -- quant_method: bitsandbytes -- load_in_8bit: True -- load_in_4bit: False -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: fp4 -- bnb_4bit_use_double_quant: False -- bnb_4bit_compute_dtype: float32 - The following `bitsandbytes` quantization config was used during training: - quant_method: bitsandbytes - load_in_8bit: True @@ -29,6 +17,5 @@ The following `bitsandbytes` quantization config was used during training: - bnb_4bit_compute_dtype: float32 ### Framework versions -- PEFT 0.6.0.dev0 - PEFT 0.6.0.dev0 diff --git a/adapter_model.bin b/adapter_model.bin index e9b913951f0eafb85b209a393e5f742ac4bf6408..56402b256e1d53f7ec8d1862916f2f4f8c38d7e9 100644 --- a/adapter_model.bin +++ b/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed6e90196f5a274b5d5ff0f18e648b6396fc99189dc82111fcfd2e83656a72f7 +oid sha256:cd9c3d8987621f868abe4c7d938c69dc21744df4cfc4475a0b74a7024edbc457 size 39409357 diff --git a/checkpoint-1000/adapter_model.bin b/checkpoint-1000/adapter_model.bin index 34fbbd2c7ce064739b832ea4f04685aa6d41faa0..787af5d85433e12ed384f46355f1f145e3bacdb7 100644 --- a/checkpoint-1000/adapter_model.bin +++ b/checkpoint-1000/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f453889b7b5f1f21797413708a1c82782ca563c8173a223b50fb5e004837c160 +oid sha256:14cc62274569aa6e55637b983042d5dd9e0c0998e567c185e59fc1a7793b8762 size 39409357 diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt index efd8cd5608cddb3c4edda6e0d5da2052af77a93d..9327c9466f4af4e3b4008f0022cd8f47e7ba2259 100644 --- a/checkpoint-1000/optimizer.pt +++ b/checkpoint-1000/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:980ce4082f86e8643d829ecc076ea00335d9e2d80d18c421ae802d241b9e3197 +oid sha256:906c3906783a3003c8ab79253037e8ca3d96e109bb687416c0d13ac8d187d364 size 78844421 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth index faca5241da3506eadd57588fb689a90f390f5558..c4f6e431605a7da3e2efbdf0dc1fae0f500de983 100644 --- a/checkpoint-1000/rng_state.pth +++ b/checkpoint-1000/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5432f961e7c82cbe7b6ec30d027cf3db4d17372f65774be8cf037a208cbc1ac +oid sha256:370c3a07f37a8aae6ea141b54ca992b21699546baf7407eb587b6056f787333b size 14575 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt index df7757b8324bccfaf0b6d6a0d952f9ebd94027b5..156adb97a2dcba9f834d2be826c444ceb19ad242 100644 --- a/checkpoint-1000/scheduler.pt +++ b/checkpoint-1000/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:203097d159784262bd5c95fedbda5673a26bd26ba8483bc4d9972c3ce0a53781 +oid sha256:bf42ce3ee82b0e0801aee2cff99b338860ec65fe61d91dcf5be231df5b9fa888 size 627 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json index e75c2b4bd63a8b9fb8aec5addf777c7352df2c99..d47d4341a5255e60a8632070e4f982fb0d9f49d9 100644 --- a/checkpoint-1000/trainer_state.json +++ b/checkpoint-1000/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 20.23395510591211, + "epoch": 11.297440423654017, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, @@ -9,6011 +9,6011 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, - { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + { + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 }, { - "epoch": 14.18, - "learning_rate": 0.00011058673469387754, - "loss": 1.2069, + "epoch": 7.92, + "learning_rate": 0.00010042613636363636, + "loss": 0.6857, "step": 701 }, { - "epoch": 14.2, - "learning_rate": 0.00011045918367346939, - "loss": 1.1968, + "epoch": 7.93, + "learning_rate": 0.00010028409090909093, + "loss": 0.6677, "step": 702 }, { - "epoch": 14.22, - "learning_rate": 0.00011033163265306123, - "loss": 1.2236, + "epoch": 7.94, + "learning_rate": 0.00010014204545454546, + "loss": 0.6697, "step": 703 }, { - "epoch": 14.24, - "learning_rate": 0.00011020408163265306, - "loss": 1.1942, + "epoch": 7.95, + "learning_rate": 0.0001, + "loss": 0.6375, "step": 704 }, { - "epoch": 14.26, - "learning_rate": 0.0001100765306122449, - "loss": 1.2561, + "epoch": 7.96, + "learning_rate": 9.985795454545455e-05, + "loss": 0.6572, "step": 705 }, { - "epoch": 14.29, - "learning_rate": 0.00010994897959183674, - "loss": 1.1839, + "epoch": 7.98, + "learning_rate": 9.97159090909091e-05, + "loss": 0.668, "step": 706 }, { - "epoch": 14.31, - "learning_rate": 0.00010982142857142858, - "loss": 1.2128, + "epoch": 7.99, + "learning_rate": 9.957386363636364e-05, + "loss": 0.6797, "step": 707 }, { - "epoch": 14.33, - "learning_rate": 0.00010969387755102041, - "loss": 1.3086, + "epoch": 8.0, + "learning_rate": 9.943181818181819e-05, + "loss": 0.6784, "step": 708 }, { - "epoch": 14.35, - "learning_rate": 0.00010956632653061226, - "loss": 1.2379, + "epoch": 8.01, + "learning_rate": 9.928977272727273e-05, + "loss": 0.6192, "step": 709 }, { - "epoch": 14.37, - "learning_rate": 0.0001094387755102041, - "loss": 1.176, + "epoch": 8.02, + "learning_rate": 9.914772727272728e-05, + "loss": 0.6287, "step": 710 }, { - "epoch": 14.39, - "learning_rate": 0.00010931122448979592, - "loss": 1.2105, + "epoch": 8.03, + "learning_rate": 9.900568181818183e-05, + "loss": 0.6034, "step": 711 }, { - "epoch": 14.41, - "learning_rate": 0.00010918367346938776, - "loss": 1.2149, + "epoch": 8.04, + "learning_rate": 9.886363636363637e-05, + "loss": 0.6167, "step": 712 }, { - "epoch": 14.43, - "learning_rate": 0.0001090561224489796, - "loss": 1.2392, + "epoch": 8.06, + "learning_rate": 9.872159090909091e-05, + "loss": 0.6353, "step": 713 }, { - "epoch": 14.45, - "learning_rate": 0.00010892857142857142, - "loss": 1.2471, + "epoch": 8.07, + "learning_rate": 9.857954545454547e-05, + "loss": 0.6222, "step": 714 }, { - "epoch": 14.47, - "learning_rate": 0.00010880102040816327, - "loss": 1.2561, + "epoch": 8.08, + "learning_rate": 9.84375e-05, + "loss": 0.5963, "step": 715 }, { - "epoch": 14.49, - "learning_rate": 0.00010867346938775511, - "loss": 1.2179, + "epoch": 8.09, + "learning_rate": 9.829545454545455e-05, + "loss": 0.6042, "step": 716 }, { - "epoch": 14.51, - "learning_rate": 0.00010854591836734696, - "loss": 1.2459, + "epoch": 8.1, + "learning_rate": 9.81534090909091e-05, + "loss": 0.612, "step": 717 }, { - "epoch": 14.53, - "learning_rate": 0.00010841836734693877, - "loss": 1.2933, + "epoch": 8.11, + "learning_rate": 9.801136363636364e-05, + "loss": 0.6069, "step": 718 }, { - "epoch": 14.55, - "learning_rate": 0.00010829081632653062, - "loss": 1.2862, + "epoch": 8.12, + "learning_rate": 9.786931818181818e-05, + "loss": 0.6001, "step": 719 }, { - "epoch": 14.57, - "learning_rate": 0.00010816326530612246, - "loss": 1.2976, + "epoch": 8.13, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6007, "step": 720 }, { - "epoch": 14.59, - "learning_rate": 0.00010803571428571428, - "loss": 1.231, + "epoch": 8.15, + "learning_rate": 9.758522727272727e-05, + "loss": 0.6079, "step": 721 }, { - "epoch": 14.61, - "learning_rate": 0.00010790816326530613, - "loss": 1.2464, + "epoch": 8.16, + "learning_rate": 9.744318181818183e-05, + "loss": 0.6216, "step": 722 }, { - "epoch": 14.63, - "learning_rate": 0.00010778061224489797, - "loss": 1.2181, + "epoch": 8.17, + "learning_rate": 9.730113636363637e-05, + "loss": 0.6321, "step": 723 }, { - "epoch": 14.65, - "learning_rate": 0.00010765306122448979, - "loss": 1.3307, + "epoch": 8.18, + "learning_rate": 9.71590909090909e-05, + "loss": 0.6044, "step": 724 }, { - "epoch": 14.67, - "learning_rate": 0.00010752551020408163, - "loss": 1.1723, + "epoch": 8.19, + "learning_rate": 9.701704545454547e-05, + "loss": 0.6028, "step": 725 }, { - "epoch": 14.69, - "learning_rate": 0.00010739795918367348, - "loss": 1.1528, + "epoch": 8.2, + "learning_rate": 9.687500000000001e-05, + "loss": 0.6098, "step": 726 }, { - "epoch": 14.71, - "learning_rate": 0.0001072704081632653, - "loss": 1.215, + "epoch": 8.21, + "learning_rate": 9.673295454545454e-05, + "loss": 0.6032, "step": 727 }, { - "epoch": 14.73, - "learning_rate": 0.00010714285714285715, - "loss": 1.2624, + "epoch": 8.22, + "learning_rate": 9.65909090909091e-05, + "loss": 0.6298, "step": 728 }, { - "epoch": 14.75, - "learning_rate": 0.00010701530612244898, - "loss": 1.3117, + "epoch": 8.24, + "learning_rate": 9.644886363636365e-05, + "loss": 0.6115, "step": 729 }, { - "epoch": 14.77, - "learning_rate": 0.00010688775510204083, - "loss": 1.2572, + "epoch": 8.25, + "learning_rate": 9.630681818181818e-05, + "loss": 0.6052, "step": 730 }, { - "epoch": 14.79, - "learning_rate": 0.00010676020408163266, - "loss": 1.222, + "epoch": 8.26, + "learning_rate": 9.616477272727274e-05, + "loss": 0.6097, "step": 731 }, { - "epoch": 14.81, - "learning_rate": 0.0001066326530612245, - "loss": 1.2881, + "epoch": 8.27, + "learning_rate": 9.602272727272728e-05, + "loss": 0.6062, "step": 732 }, { - "epoch": 14.83, - "learning_rate": 0.00010650510204081635, - "loss": 1.2676, + "epoch": 8.28, + "learning_rate": 9.588068181818183e-05, + "loss": 0.5984, "step": 733 }, { - "epoch": 14.85, - "learning_rate": 0.00010637755102040816, - "loss": 1.2734, + "epoch": 8.29, + "learning_rate": 9.573863636363637e-05, + "loss": 0.6432, "step": 734 }, { - "epoch": 14.87, - "learning_rate": 0.00010625000000000001, - "loss": 1.2885, + "epoch": 8.3, + "learning_rate": 9.559659090909092e-05, + "loss": 0.5814, "step": 735 }, { - "epoch": 14.89, - "learning_rate": 0.00010612244897959185, - "loss": 1.2764, + "epoch": 8.31, + "learning_rate": 9.545454545454546e-05, + "loss": 0.5965, "step": 736 }, { - "epoch": 14.91, - "learning_rate": 0.00010599489795918367, - "loss": 1.3267, + "epoch": 8.33, + "learning_rate": 9.53125e-05, + "loss": 0.6102, "step": 737 }, { - "epoch": 14.93, - "learning_rate": 0.00010586734693877551, - "loss": 1.2445, + "epoch": 8.34, + "learning_rate": 9.517045454545455e-05, + "loss": 0.5849, "step": 738 }, { - "epoch": 14.95, - "learning_rate": 0.00010573979591836736, - "loss": 1.3359, + "epoch": 8.35, + "learning_rate": 9.50284090909091e-05, + "loss": 0.6062, "step": 739 }, { - "epoch": 14.97, - "learning_rate": 0.00010561224489795918, - "loss": 1.2508, + "epoch": 8.36, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6031, "step": 740 }, { - "epoch": 14.99, - "learning_rate": 0.00010548469387755102, - "loss": 1.2227, + "epoch": 8.37, + "learning_rate": 9.474431818181819e-05, + "loss": 0.5932, "step": 741 }, { - "epoch": 15.01, - "learning_rate": 0.00010535714285714286, - "loss": 1.1889, + "epoch": 8.38, + "learning_rate": 9.460227272727273e-05, + "loss": 0.589, "step": 742 }, { - "epoch": 15.03, - "learning_rate": 0.00010522959183673471, - "loss": 1.1919, + "epoch": 8.39, + "learning_rate": 9.446022727272728e-05, + "loss": 0.6096, "step": 743 }, { - "epoch": 15.05, - "learning_rate": 0.00010510204081632653, - "loss": 1.2383, + "epoch": 8.41, + "learning_rate": 9.431818181818182e-05, + "loss": 0.601, "step": 744 }, { - "epoch": 15.07, - "learning_rate": 0.00010497448979591837, - "loss": 1.2401, + "epoch": 8.42, + "learning_rate": 9.417613636363637e-05, + "loss": 0.5798, "step": 745 }, { - "epoch": 15.09, - "learning_rate": 0.00010484693877551021, - "loss": 1.2015, + "epoch": 8.43, + "learning_rate": 9.403409090909091e-05, + "loss": 0.59, "step": 746 }, { - "epoch": 15.11, - "learning_rate": 0.00010471938775510203, - "loss": 1.1509, + "epoch": 8.44, + "learning_rate": 9.389204545454546e-05, + "loss": 0.5988, "step": 747 }, { - "epoch": 15.13, - "learning_rate": 0.00010459183673469388, - "loss": 1.1878, + "epoch": 8.45, + "learning_rate": 9.375e-05, + "loss": 0.5591, "step": 748 }, { - "epoch": 15.16, - "learning_rate": 0.00010446428571428572, - "loss": 1.1706, + "epoch": 8.46, + "learning_rate": 9.360795454545455e-05, + "loss": 0.5939, "step": 749 }, { - "epoch": 15.18, - "learning_rate": 0.00010433673469387755, - "loss": 1.1285, + "epoch": 8.47, + "learning_rate": 9.346590909090909e-05, + "loss": 0.5886, "step": 750 }, { - "epoch": 15.2, - "learning_rate": 0.0001042091836734694, - "loss": 1.1608, + "epoch": 8.48, + "learning_rate": 9.332386363636364e-05, + "loss": 0.5994, "step": 751 }, { - "epoch": 15.22, - "learning_rate": 0.00010408163265306123, - "loss": 1.1178, + "epoch": 8.5, + "learning_rate": 9.318181818181818e-05, + "loss": 0.5821, "step": 752 }, { - "epoch": 15.24, - "learning_rate": 0.00010395408163265306, - "loss": 1.1293, + "epoch": 8.51, + "learning_rate": 9.303977272727273e-05, + "loss": 0.602, "step": 753 }, { - "epoch": 15.26, - "learning_rate": 0.0001038265306122449, - "loss": 1.2306, + "epoch": 8.52, + "learning_rate": 9.289772727272727e-05, + "loss": 0.5708, "step": 754 }, { - "epoch": 15.28, - "learning_rate": 0.00010369897959183675, - "loss": 1.1541, + "epoch": 8.53, + "learning_rate": 9.275568181818183e-05, + "loss": 0.5902, "step": 755 }, { - "epoch": 15.3, - "learning_rate": 0.00010357142857142859, - "loss": 1.1702, + "epoch": 8.54, + "learning_rate": 9.261363636363636e-05, + "loss": 0.6053, "step": 756 }, { - "epoch": 15.32, - "learning_rate": 0.00010344387755102041, - "loss": 1.2119, + "epoch": 8.55, + "learning_rate": 9.247159090909091e-05, + "loss": 0.5797, "step": 757 }, { - "epoch": 15.34, - "learning_rate": 0.00010331632653061225, - "loss": 1.2239, + "epoch": 8.56, + "learning_rate": 9.232954545454547e-05, + "loss": 0.5965, "step": 758 }, { - "epoch": 15.36, - "learning_rate": 0.0001031887755102041, - "loss": 1.2019, + "epoch": 8.57, + "learning_rate": 9.21875e-05, + "loss": 0.5738, "step": 759 }, { - "epoch": 15.38, - "learning_rate": 0.00010306122448979591, - "loss": 1.2197, + "epoch": 8.59, + "learning_rate": 9.204545454545454e-05, + "loss": 0.5819, "step": 760 }, { - "epoch": 15.4, - "learning_rate": 0.00010293367346938776, - "loss": 1.1769, + "epoch": 8.6, + "learning_rate": 9.19034090909091e-05, + "loss": 0.5994, "step": 761 }, { - "epoch": 15.42, - "learning_rate": 0.0001028061224489796, - "loss": 1.1907, + "epoch": 8.61, + "learning_rate": 9.176136363636363e-05, + "loss": 0.5738, "step": 762 }, { - "epoch": 15.44, - "learning_rate": 0.00010267857142857142, - "loss": 1.2089, + "epoch": 8.62, + "learning_rate": 9.161931818181818e-05, + "loss": 0.5663, "step": 763 }, { - "epoch": 15.46, - "learning_rate": 0.00010255102040816327, - "loss": 1.1335, + "epoch": 8.63, + "learning_rate": 9.147727272727274e-05, + "loss": 0.5798, "step": 764 }, { - "epoch": 15.48, - "learning_rate": 0.00010242346938775511, - "loss": 1.1633, + "epoch": 8.64, + "learning_rate": 9.133522727272727e-05, + "loss": 0.5705, "step": 765 }, { - "epoch": 15.5, - "learning_rate": 0.00010229591836734695, - "loss": 1.1578, + "epoch": 8.65, + "learning_rate": 9.119318181818183e-05, + "loss": 0.5943, "step": 766 }, { - "epoch": 15.52, - "learning_rate": 0.00010216836734693877, - "loss": 1.2236, + "epoch": 8.67, + "learning_rate": 9.105113636363637e-05, + "loss": 0.6019, "step": 767 }, { - "epoch": 15.54, - "learning_rate": 0.00010204081632653062, - "loss": 1.1941, + "epoch": 8.68, + "learning_rate": 9.090909090909092e-05, + "loss": 0.5733, "step": 768 }, { - "epoch": 15.56, - "learning_rate": 0.00010191326530612246, - "loss": 1.2666, + "epoch": 8.69, + "learning_rate": 9.076704545454546e-05, + "loss": 0.575, "step": 769 }, { - "epoch": 15.58, - "learning_rate": 0.00010178571428571428, - "loss": 1.1232, + "epoch": 8.7, + "learning_rate": 9.062500000000001e-05, + "loss": 0.5675, "step": 770 }, { - "epoch": 15.6, - "learning_rate": 0.00010165816326530612, - "loss": 1.2242, + "epoch": 8.71, + "learning_rate": 9.048295454545455e-05, + "loss": 0.566, "step": 771 }, { - "epoch": 15.62, - "learning_rate": 0.00010153061224489797, - "loss": 1.1852, + "epoch": 8.72, + "learning_rate": 9.03409090909091e-05, + "loss": 0.5513, "step": 772 }, { - "epoch": 15.64, - "learning_rate": 0.0001014030612244898, - "loss": 1.2626, + "epoch": 8.73, + "learning_rate": 9.019886363636364e-05, + "loss": 0.5682, "step": 773 }, { - "epoch": 15.66, - "learning_rate": 0.00010127551020408164, - "loss": 1.1873, + "epoch": 8.74, + "learning_rate": 9.005681818181819e-05, + "loss": 0.5508, "step": 774 }, { - "epoch": 15.68, - "learning_rate": 0.00010114795918367349, - "loss": 1.3005, + "epoch": 8.76, + "learning_rate": 8.991477272727273e-05, + "loss": 0.5668, "step": 775 }, { - "epoch": 15.7, - "learning_rate": 0.0001010204081632653, - "loss": 1.1904, + "epoch": 8.77, + "learning_rate": 8.977272727272728e-05, + "loss": 0.569, "step": 776 }, { - "epoch": 15.72, - "learning_rate": 0.00010089285714285715, - "loss": 1.2927, + "epoch": 8.78, + "learning_rate": 8.963068181818182e-05, + "loss": 0.5897, "step": 777 }, { - "epoch": 15.74, - "learning_rate": 0.00010076530612244899, - "loss": 1.179, + "epoch": 8.79, + "learning_rate": 8.948863636363637e-05, + "loss": 0.5738, "step": 778 }, { - "epoch": 15.76, - "learning_rate": 0.00010063775510204084, - "loss": 1.2027, + "epoch": 8.8, + "learning_rate": 8.934659090909091e-05, + "loss": 0.5511, "step": 779 }, { - "epoch": 15.78, - "learning_rate": 0.00010051020408163265, - "loss": 1.2428, + "epoch": 8.81, + "learning_rate": 8.920454545454546e-05, + "loss": 0.5659, "step": 780 }, { - "epoch": 15.8, - "learning_rate": 0.0001003826530612245, - "loss": 1.2324, + "epoch": 8.82, + "learning_rate": 8.90625e-05, + "loss": 0.5649, "step": 781 }, { - "epoch": 15.82, - "learning_rate": 0.00010025510204081634, - "loss": 1.1251, + "epoch": 8.83, + "learning_rate": 8.892045454545455e-05, + "loss": 0.5618, "step": 782 }, { - "epoch": 15.84, - "learning_rate": 0.00010012755102040816, - "loss": 1.2405, + "epoch": 8.85, + "learning_rate": 8.87784090909091e-05, + "loss": 0.5602, "step": 783 }, { - "epoch": 15.86, - "learning_rate": 0.0001, - "loss": 1.2005, + "epoch": 8.86, + "learning_rate": 8.863636363636364e-05, + "loss": 0.5723, "step": 784 }, { - "epoch": 15.88, - "learning_rate": 9.987244897959184e-05, - "loss": 1.2259, + "epoch": 8.87, + "learning_rate": 8.849431818181818e-05, + "loss": 0.5816, "step": 785 }, { - "epoch": 15.9, - "learning_rate": 9.974489795918368e-05, - "loss": 1.1576, + "epoch": 8.88, + "learning_rate": 8.835227272727273e-05, + "loss": 0.555, "step": 786 }, { - "epoch": 15.92, - "learning_rate": 9.961734693877551e-05, - "loss": 1.1834, + "epoch": 8.89, + "learning_rate": 8.821022727272727e-05, + "loss": 0.5563, "step": 787 }, { - "epoch": 15.94, - "learning_rate": 9.948979591836736e-05, - "loss": 1.2396, + "epoch": 8.9, + "learning_rate": 8.806818181818183e-05, + "loss": 0.554, "step": 788 }, { - "epoch": 15.96, - "learning_rate": 9.936224489795919e-05, - "loss": 1.1865, + "epoch": 8.91, + "learning_rate": 8.792613636363636e-05, + "loss": 0.5671, "step": 789 }, { - "epoch": 15.98, - "learning_rate": 9.923469387755102e-05, - "loss": 1.2356, + "epoch": 8.92, + "learning_rate": 8.778409090909091e-05, + "loss": 0.5485, "step": 790 }, { - "epoch": 16.01, - "learning_rate": 9.910714285714286e-05, - "loss": 1.2639, + "epoch": 8.94, + "learning_rate": 8.764204545454547e-05, + "loss": 0.5712, "step": 791 }, { - "epoch": 16.03, - "learning_rate": 9.897959183673469e-05, - "loss": 1.1216, + "epoch": 8.95, + "learning_rate": 8.75e-05, + "loss": 0.5507, "step": 792 }, { - "epoch": 16.05, - "learning_rate": 9.885204081632652e-05, - "loss": 1.1051, + "epoch": 8.96, + "learning_rate": 8.735795454545454e-05, + "loss": 0.5718, "step": 793 }, { - "epoch": 16.07, - "learning_rate": 9.872448979591837e-05, - "loss": 1.0864, + "epoch": 8.97, + "learning_rate": 8.72159090909091e-05, + "loss": 0.5585, "step": 794 }, { - "epoch": 16.09, - "learning_rate": 9.859693877551021e-05, - "loss": 1.182, + "epoch": 8.98, + "learning_rate": 8.707386363636363e-05, + "loss": 0.5563, "step": 795 }, { - "epoch": 16.11, - "learning_rate": 9.846938775510204e-05, - "loss": 1.1272, + "epoch": 8.99, + "learning_rate": 8.693181818181818e-05, + "loss": 0.581, "step": 796 }, { - "epoch": 16.13, - "learning_rate": 9.834183673469389e-05, - "loss": 1.1946, + "epoch": 9.0, + "learning_rate": 8.678977272727274e-05, + "loss": 0.5511, "step": 797 }, { - "epoch": 16.15, - "learning_rate": 9.821428571428572e-05, - "loss": 1.0875, + "epoch": 9.02, + "learning_rate": 8.664772727272727e-05, + "loss": 0.5103, "step": 798 }, { - "epoch": 16.17, - "learning_rate": 9.808673469387756e-05, - "loss": 1.1671, + "epoch": 9.03, + "learning_rate": 8.650568181818183e-05, + "loss": 0.5323, "step": 799 }, { - "epoch": 16.19, - "learning_rate": 9.79591836734694e-05, - "loss": 1.1502, + "epoch": 9.04, + "learning_rate": 8.636363636363637e-05, + "loss": 0.5092, "step": 800 }, { - "epoch": 16.21, - "learning_rate": 9.783163265306124e-05, - "loss": 1.19, + "epoch": 9.05, + "learning_rate": 8.62215909090909e-05, + "loss": 0.5247, "step": 801 }, { - "epoch": 16.23, - "learning_rate": 9.770408163265307e-05, - "loss": 1.1258, + "epoch": 9.06, + "learning_rate": 8.607954545454546e-05, + "loss": 0.5403, "step": 802 }, { - "epoch": 16.25, - "learning_rate": 9.75765306122449e-05, - "loss": 1.1765, + "epoch": 9.07, + "learning_rate": 8.593750000000001e-05, + "loss": 0.5252, "step": 803 }, { - "epoch": 16.27, - "learning_rate": 9.744897959183674e-05, - "loss": 1.1217, + "epoch": 9.08, + "learning_rate": 8.579545454545454e-05, + "loss": 0.5296, "step": 804 }, { - "epoch": 16.29, - "learning_rate": 9.732142857142858e-05, - "loss": 1.1293, + "epoch": 9.09, + "learning_rate": 8.56534090909091e-05, + "loss": 0.5223, "step": 805 }, { - "epoch": 16.31, - "learning_rate": 9.719387755102042e-05, - "loss": 1.17, + "epoch": 9.11, + "learning_rate": 8.551136363636364e-05, + "loss": 0.4972, "step": 806 }, { - "epoch": 16.33, - "learning_rate": 9.706632653061225e-05, - "loss": 1.17, + "epoch": 9.12, + "learning_rate": 8.536931818181818e-05, + "loss": 0.5005, "step": 807 }, { - "epoch": 16.35, - "learning_rate": 9.693877551020408e-05, - "loss": 1.2004, + "epoch": 9.13, + "learning_rate": 8.522727272727273e-05, + "loss": 0.5249, "step": 808 }, { - "epoch": 16.37, - "learning_rate": 9.681122448979593e-05, - "loss": 1.1648, + "epoch": 9.14, + "learning_rate": 8.508522727272728e-05, + "loss": 0.5135, "step": 809 }, { - "epoch": 16.39, - "learning_rate": 9.668367346938776e-05, - "loss": 1.0688, + "epoch": 9.15, + "learning_rate": 8.494318181818182e-05, + "loss": 0.5053, "step": 810 }, { - "epoch": 16.41, - "learning_rate": 9.655612244897959e-05, - "loss": 1.1607, + "epoch": 9.16, + "learning_rate": 8.480113636363637e-05, + "loss": 0.5158, "step": 811 }, { - "epoch": 16.43, - "learning_rate": 9.642857142857143e-05, - "loss": 1.1298, + "epoch": 9.17, + "learning_rate": 8.465909090909091e-05, + "loss": 0.5061, "step": 812 }, { - "epoch": 16.45, - "learning_rate": 9.630102040816326e-05, - "loss": 1.1064, + "epoch": 9.18, + "learning_rate": 8.451704545454546e-05, + "loss": 0.4988, "step": 813 }, { - "epoch": 16.47, - "learning_rate": 9.617346938775511e-05, - "loss": 1.1472, + "epoch": 9.2, + "learning_rate": 8.4375e-05, + "loss": 0.5273, "step": 814 }, { - "epoch": 16.49, - "learning_rate": 9.604591836734694e-05, - "loss": 1.1577, + "epoch": 9.21, + "learning_rate": 8.423295454545455e-05, + "loss": 0.5332, "step": 815 }, { - "epoch": 16.51, - "learning_rate": 9.591836734693878e-05, - "loss": 1.1436, + "epoch": 9.22, + "learning_rate": 8.40909090909091e-05, + "loss": 0.5181, "step": 816 }, { - "epoch": 16.53, - "learning_rate": 9.579081632653061e-05, - "loss": 1.1657, + "epoch": 9.23, + "learning_rate": 8.394886363636364e-05, + "loss": 0.5085, "step": 817 }, { - "epoch": 16.55, - "learning_rate": 9.566326530612246e-05, - "loss": 1.1147, + "epoch": 9.24, + "learning_rate": 8.380681818181818e-05, + "loss": 0.5137, "step": 818 }, { - "epoch": 16.57, - "learning_rate": 9.553571428571429e-05, - "loss": 1.1839, + "epoch": 9.25, + "learning_rate": 8.366477272727273e-05, + "loss": 0.5195, "step": 819 }, { - "epoch": 16.59, - "learning_rate": 9.540816326530613e-05, - "loss": 1.1298, + "epoch": 9.26, + "learning_rate": 8.352272727272727e-05, + "loss": 0.5077, "step": 820 }, { - "epoch": 16.61, - "learning_rate": 9.528061224489796e-05, - "loss": 1.2141, + "epoch": 9.28, + "learning_rate": 8.338068181818183e-05, + "loss": 0.5074, "step": 821 }, { - "epoch": 16.63, - "learning_rate": 9.515306122448981e-05, - "loss": 1.2045, + "epoch": 9.29, + "learning_rate": 8.323863636363637e-05, + "loss": 0.5142, "step": 822 }, { - "epoch": 16.65, - "learning_rate": 9.502551020408164e-05, - "loss": 1.1791, + "epoch": 9.3, + "learning_rate": 8.309659090909091e-05, + "loss": 0.5116, "step": 823 }, { - "epoch": 16.67, - "learning_rate": 9.489795918367348e-05, - "loss": 1.1137, + "epoch": 9.31, + "learning_rate": 8.295454545454547e-05, + "loss": 0.4974, "step": 824 }, { - "epoch": 16.69, - "learning_rate": 9.477040816326531e-05, - "loss": 1.1312, + "epoch": 9.32, + "learning_rate": 8.28125e-05, + "loss": 0.5117, "step": 825 }, { - "epoch": 16.71, - "learning_rate": 9.464285714285715e-05, - "loss": 1.1102, + "epoch": 9.33, + "learning_rate": 8.267045454545455e-05, + "loss": 0.5114, "step": 826 }, { - "epoch": 16.73, - "learning_rate": 9.451530612244899e-05, - "loss": 1.1865, + "epoch": 9.34, + "learning_rate": 8.25284090909091e-05, + "loss": 0.5039, "step": 827 }, { - "epoch": 16.75, - "learning_rate": 9.438775510204082e-05, - "loss": 1.1232, + "epoch": 9.35, + "learning_rate": 8.238636363636364e-05, + "loss": 0.498, "step": 828 }, { - "epoch": 16.77, - "learning_rate": 9.426020408163265e-05, - "loss": 1.2068, + "epoch": 9.37, + "learning_rate": 8.224431818181818e-05, + "loss": 0.5042, "step": 829 }, { - "epoch": 16.79, - "learning_rate": 9.41326530612245e-05, - "loss": 1.1864, + "epoch": 9.38, + "learning_rate": 8.210227272727274e-05, + "loss": 0.5049, "step": 830 }, { - "epoch": 16.81, - "learning_rate": 9.400510204081633e-05, - "loss": 1.2195, + "epoch": 9.39, + "learning_rate": 8.196022727272727e-05, + "loss": 0.5123, "step": 831 }, { - "epoch": 16.83, - "learning_rate": 9.387755102040817e-05, - "loss": 1.2063, + "epoch": 9.4, + "learning_rate": 8.181818181818183e-05, + "loss": 0.4907, "step": 832 }, { - "epoch": 16.85, - "learning_rate": 9.375e-05, - "loss": 1.1455, + "epoch": 9.41, + "learning_rate": 8.167613636363637e-05, + "loss": 0.5267, "step": 833 }, { - "epoch": 16.88, - "learning_rate": 9.362244897959183e-05, - "loss": 1.1819, + "epoch": 9.42, + "learning_rate": 8.15340909090909e-05, + "loss": 0.5314, "step": 834 }, { - "epoch": 16.9, - "learning_rate": 9.349489795918368e-05, - "loss": 1.1887, + "epoch": 9.43, + "learning_rate": 8.139204545454546e-05, + "loss": 0.4952, "step": 835 }, { - "epoch": 16.92, - "learning_rate": 9.336734693877551e-05, - "loss": 1.1557, + "epoch": 9.44, + "learning_rate": 8.125000000000001e-05, + "loss": 0.5014, "step": 836 }, { - "epoch": 16.94, - "learning_rate": 9.323979591836735e-05, - "loss": 1.2094, + "epoch": 9.46, + "learning_rate": 8.110795454545454e-05, + "loss": 0.4967, "step": 837 }, { - "epoch": 16.96, - "learning_rate": 9.311224489795918e-05, - "loss": 1.1512, + "epoch": 9.47, + "learning_rate": 8.09659090909091e-05, + "loss": 0.5116, "step": 838 }, { - "epoch": 16.98, - "learning_rate": 9.298469387755103e-05, - "loss": 1.1463, + "epoch": 9.48, + "learning_rate": 8.082386363636365e-05, + "loss": 0.5119, "step": 839 }, { - "epoch": 17.0, - "learning_rate": 9.285714285714286e-05, - "loss": 1.155, + "epoch": 9.49, + "learning_rate": 8.068181818181818e-05, + "loss": 0.4987, "step": 840 }, { - "epoch": 17.02, - "learning_rate": 9.27295918367347e-05, - "loss": 1.1292, + "epoch": 9.5, + "learning_rate": 8.053977272727274e-05, + "loss": 0.5063, "step": 841 }, { - "epoch": 17.04, - "learning_rate": 9.260204081632653e-05, - "loss": 1.0996, + "epoch": 9.51, + "learning_rate": 8.039772727272728e-05, + "loss": 0.5019, "step": 842 }, { - "epoch": 17.06, - "learning_rate": 9.247448979591838e-05, - "loss": 1.0662, + "epoch": 9.52, + "learning_rate": 8.025568181818183e-05, + "loss": 0.5272, "step": 843 }, { - "epoch": 17.08, - "learning_rate": 9.234693877551021e-05, - "loss": 1.0931, + "epoch": 9.54, + "learning_rate": 8.011363636363637e-05, + "loss": 0.4969, "step": 844 }, { - "epoch": 17.1, - "learning_rate": 9.221938775510205e-05, - "loss": 1.0727, + "epoch": 9.55, + "learning_rate": 7.997159090909092e-05, + "loss": 0.5222, "step": 845 }, { - "epoch": 17.12, - "learning_rate": 9.209183673469388e-05, - "loss": 1.1043, + "epoch": 9.56, + "learning_rate": 7.982954545454546e-05, + "loss": 0.4729, "step": 846 }, { - "epoch": 17.14, - "learning_rate": 9.196428571428572e-05, - "loss": 1.0594, + "epoch": 9.57, + "learning_rate": 7.96875e-05, + "loss": 0.4976, "step": 847 }, { - "epoch": 17.16, - "learning_rate": 9.183673469387756e-05, - "loss": 1.0952, + "epoch": 9.58, + "learning_rate": 7.954545454545455e-05, + "loss": 0.4974, "step": 848 }, { - "epoch": 17.18, - "learning_rate": 9.170918367346939e-05, - "loss": 1.0639, + "epoch": 9.59, + "learning_rate": 7.94034090909091e-05, + "loss": 0.4849, "step": 849 }, { - "epoch": 17.2, - "learning_rate": 9.158163265306124e-05, - "loss": 1.132, + "epoch": 9.6, + "learning_rate": 7.926136363636364e-05, + "loss": 0.4897, "step": 850 }, { - "epoch": 17.22, - "learning_rate": 9.145408163265307e-05, - "loss": 1.1083, + "epoch": 9.61, + "learning_rate": 7.911931818181819e-05, + "loss": 0.4962, "step": 851 }, { - "epoch": 17.24, - "learning_rate": 9.13265306122449e-05, - "loss": 1.1282, + "epoch": 9.63, + "learning_rate": 7.897727272727273e-05, + "loss": 0.4877, "step": 852 }, { - "epoch": 17.26, - "learning_rate": 9.119897959183674e-05, - "loss": 1.0474, + "epoch": 9.64, + "learning_rate": 7.883522727272728e-05, + "loss": 0.4921, "step": 853 }, { - "epoch": 17.28, - "learning_rate": 9.107142857142857e-05, - "loss": 1.1138, + "epoch": 9.65, + "learning_rate": 7.869318181818182e-05, + "loss": 0.4969, "step": 854 }, { - "epoch": 17.3, - "learning_rate": 9.094387755102042e-05, - "loss": 1.1025, + "epoch": 9.66, + "learning_rate": 7.855113636363637e-05, + "loss": 0.5045, "step": 855 }, { - "epoch": 17.32, - "learning_rate": 9.081632653061225e-05, - "loss": 1.0968, + "epoch": 9.67, + "learning_rate": 7.840909090909091e-05, + "loss": 0.5207, "step": 856 }, { - "epoch": 17.34, - "learning_rate": 9.068877551020408e-05, - "loss": 1.1683, + "epoch": 9.68, + "learning_rate": 7.826704545454546e-05, + "loss": 0.5098, "step": 857 }, { - "epoch": 17.36, - "learning_rate": 9.056122448979592e-05, - "loss": 1.0975, + "epoch": 9.69, + "learning_rate": 7.8125e-05, + "loss": 0.5005, "step": 858 }, { - "epoch": 17.38, - "learning_rate": 9.043367346938775e-05, - "loss": 1.1274, + "epoch": 9.7, + "learning_rate": 7.798295454545455e-05, + "loss": 0.5028, "step": 859 }, { - "epoch": 17.4, - "learning_rate": 9.030612244897958e-05, - "loss": 1.0916, + "epoch": 9.72, + "learning_rate": 7.784090909090909e-05, + "loss": 0.5067, "step": 860 }, { - "epoch": 17.42, - "learning_rate": 9.017857142857143e-05, - "loss": 1.0912, + "epoch": 9.73, + "learning_rate": 7.769886363636364e-05, + "loss": 0.484, "step": 861 }, { - "epoch": 17.44, - "learning_rate": 9.005102040816327e-05, - "loss": 1.0875, + "epoch": 9.74, + "learning_rate": 7.755681818181818e-05, + "loss": 0.5029, "step": 862 }, { - "epoch": 17.46, - "learning_rate": 8.99234693877551e-05, - "loss": 1.05, + "epoch": 9.75, + "learning_rate": 7.741477272727273e-05, + "loss": 0.5077, "step": 863 }, { - "epoch": 17.48, - "learning_rate": 8.979591836734695e-05, - "loss": 1.1418, + "epoch": 9.76, + "learning_rate": 7.727272727272727e-05, + "loss": 0.5091, "step": 864 }, { - "epoch": 17.5, - "learning_rate": 8.966836734693878e-05, - "loss": 1.0609, + "epoch": 9.77, + "learning_rate": 7.713068181818183e-05, + "loss": 0.4781, "step": 865 }, { - "epoch": 17.52, - "learning_rate": 8.954081632653062e-05, - "loss": 1.1611, + "epoch": 9.78, + "learning_rate": 7.698863636363636e-05, + "loss": 0.5124, "step": 866 }, { - "epoch": 17.54, - "learning_rate": 8.941326530612245e-05, - "loss": 1.1065, + "epoch": 9.79, + "learning_rate": 7.684659090909091e-05, + "loss": 0.4859, "step": 867 }, { - "epoch": 17.56, - "learning_rate": 8.92857142857143e-05, - "loss": 1.1611, + "epoch": 9.81, + "learning_rate": 7.670454545454547e-05, + "loss": 0.4872, "step": 868 }, { - "epoch": 17.58, - "learning_rate": 8.915816326530613e-05, - "loss": 1.1398, + "epoch": 9.82, + "learning_rate": 7.65625e-05, + "loss": 0.4675, "step": 869 }, { - "epoch": 17.6, - "learning_rate": 8.903061224489796e-05, - "loss": 1.1055, + "epoch": 9.83, + "learning_rate": 7.642045454545454e-05, + "loss": 0.5056, "step": 870 }, { - "epoch": 17.62, - "learning_rate": 8.89030612244898e-05, - "loss": 1.1314, + "epoch": 9.84, + "learning_rate": 7.62784090909091e-05, + "loss": 0.4868, "step": 871 }, { - "epoch": 17.64, - "learning_rate": 8.877551020408164e-05, - "loss": 1.1084, + "epoch": 9.85, + "learning_rate": 7.613636363636363e-05, + "loss": 0.4907, "step": 872 }, { - "epoch": 17.66, - "learning_rate": 8.864795918367348e-05, - "loss": 1.1254, + "epoch": 9.86, + "learning_rate": 7.599431818181818e-05, + "loss": 0.474, "step": 873 }, { - "epoch": 17.68, - "learning_rate": 8.852040816326531e-05, - "loss": 1.142, + "epoch": 9.87, + "learning_rate": 7.585227272727274e-05, + "loss": 0.4813, "step": 874 }, { - "epoch": 17.7, - "learning_rate": 8.839285714285714e-05, - "loss": 1.1371, + "epoch": 9.89, + "learning_rate": 7.571022727272727e-05, + "loss": 0.4838, "step": 875 }, { - "epoch": 17.72, - "learning_rate": 8.826530612244899e-05, - "loss": 1.1092, + "epoch": 9.9, + "learning_rate": 7.556818181818183e-05, + "loss": 0.4935, "step": 876 }, { - "epoch": 17.75, - "learning_rate": 8.813775510204082e-05, - "loss": 1.161, + "epoch": 9.91, + "learning_rate": 7.542613636363637e-05, + "loss": 0.4884, "step": 877 }, { - "epoch": 17.77, - "learning_rate": 8.801020408163265e-05, - "loss": 1.1044, + "epoch": 9.92, + "learning_rate": 7.52840909090909e-05, + "loss": 0.4797, "step": 878 }, { - "epoch": 17.79, - "learning_rate": 8.788265306122449e-05, - "loss": 1.117, + "epoch": 9.93, + "learning_rate": 7.514204545454546e-05, + "loss": 0.479, "step": 879 }, { - "epoch": 17.81, - "learning_rate": 8.775510204081632e-05, - "loss": 1.1262, + "epoch": 9.94, + "learning_rate": 7.500000000000001e-05, + "loss": 0.4727, "step": 880 }, { - "epoch": 17.83, - "learning_rate": 8.762755102040817e-05, - "loss": 1.0829, + "epoch": 9.95, + "learning_rate": 7.485795454545454e-05, + "loss": 0.4758, "step": 881 }, { - "epoch": 17.85, - "learning_rate": 8.75e-05, - "loss": 1.1393, + "epoch": 9.96, + "learning_rate": 7.47159090909091e-05, + "loss": 0.482, "step": 882 }, { - "epoch": 17.87, - "learning_rate": 8.737244897959183e-05, - "loss": 1.1781, + "epoch": 9.98, + "learning_rate": 7.457386363636364e-05, + "loss": 0.4951, "step": 883 }, { - "epoch": 17.89, - "learning_rate": 8.724489795918367e-05, - "loss": 1.1582, + "epoch": 9.99, + "learning_rate": 7.443181818181817e-05, + "loss": 0.4823, "step": 884 }, { - "epoch": 17.91, - "learning_rate": 8.711734693877552e-05, - "loss": 1.1469, + "epoch": 10.0, + "learning_rate": 7.428977272727273e-05, + "loss": 0.4638, "step": 885 }, { - "epoch": 17.93, - "learning_rate": 8.698979591836735e-05, - "loss": 1.1494, + "epoch": 10.01, + "learning_rate": 7.414772727272728e-05, + "loss": 0.4715, "step": 886 }, { - "epoch": 17.95, - "learning_rate": 8.68622448979592e-05, - "loss": 1.1251, + "epoch": 10.02, + "learning_rate": 7.400568181818182e-05, + "loss": 0.461, "step": 887 }, { - "epoch": 17.97, - "learning_rate": 8.673469387755102e-05, - "loss": 1.1624, + "epoch": 10.03, + "learning_rate": 7.386363636363637e-05, + "loss": 0.4429, "step": 888 }, { - "epoch": 17.99, - "learning_rate": 8.660714285714287e-05, - "loss": 1.0842, + "epoch": 10.04, + "learning_rate": 7.372159090909091e-05, + "loss": 0.4403, "step": 889 }, { - "epoch": 18.01, - "learning_rate": 8.64795918367347e-05, - "loss": 1.1944, + "epoch": 10.05, + "learning_rate": 7.357954545454546e-05, + "loss": 0.4519, "step": 890 }, { - "epoch": 18.03, - "learning_rate": 8.635204081632653e-05, - "loss": 1.0642, + "epoch": 10.07, + "learning_rate": 7.34375e-05, + "loss": 0.4611, "step": 891 }, { - "epoch": 18.05, - "learning_rate": 8.622448979591838e-05, - "loss": 1.0459, + "epoch": 10.08, + "learning_rate": 7.329545454545455e-05, + "loss": 0.4543, "step": 892 }, { - "epoch": 18.07, - "learning_rate": 8.60969387755102e-05, - "loss": 1.0941, + "epoch": 10.09, + "learning_rate": 7.315340909090909e-05, + "loss": 0.4528, "step": 893 }, { - "epoch": 18.09, - "learning_rate": 8.596938775510205e-05, - "loss": 1.0457, + "epoch": 10.1, + "learning_rate": 7.301136363636364e-05, + "loss": 0.4586, "step": 894 }, { - "epoch": 18.11, - "learning_rate": 8.584183673469388e-05, - "loss": 1.1033, + "epoch": 10.11, + "learning_rate": 7.286931818181818e-05, + "loss": 0.4418, "step": 895 }, { - "epoch": 18.13, - "learning_rate": 8.571428571428571e-05, - "loss": 1.0756, + "epoch": 10.12, + "learning_rate": 7.272727272727273e-05, + "loss": 0.4435, "step": 896 }, { - "epoch": 18.15, - "learning_rate": 8.558673469387756e-05, - "loss": 1.0615, + "epoch": 10.13, + "learning_rate": 7.258522727272727e-05, + "loss": 0.44, "step": 897 }, { - "epoch": 18.17, - "learning_rate": 8.545918367346939e-05, - "loss": 1.0828, + "epoch": 10.15, + "learning_rate": 7.244318181818183e-05, + "loss": 0.4589, "step": 898 }, { - "epoch": 18.19, - "learning_rate": 8.533163265306123e-05, - "loss": 1.1158, + "epoch": 10.16, + "learning_rate": 7.230113636363636e-05, + "loss": 0.4597, "step": 899 }, { - "epoch": 18.21, - "learning_rate": 8.520408163265306e-05, - "loss": 1.0133, + "epoch": 10.17, + "learning_rate": 7.215909090909091e-05, + "loss": 0.4479, "step": 900 }, { - "epoch": 18.23, - "learning_rate": 8.50765306122449e-05, - "loss": 1.0437, + "epoch": 10.18, + "learning_rate": 7.201704545454547e-05, + "loss": 0.4477, "step": 901 }, { - "epoch": 18.25, - "learning_rate": 8.494897959183674e-05, - "loss": 1.0372, + "epoch": 10.19, + "learning_rate": 7.1875e-05, + "loss": 0.446, "step": 902 }, { - "epoch": 18.27, - "learning_rate": 8.482142857142857e-05, - "loss": 1.1012, + "epoch": 10.2, + "learning_rate": 7.173295454545454e-05, + "loss": 0.4546, "step": 903 }, { - "epoch": 18.29, - "learning_rate": 8.469387755102041e-05, - "loss": 1.0777, + "epoch": 10.21, + "learning_rate": 7.15909090909091e-05, + "loss": 0.4347, "step": 904 }, { - "epoch": 18.31, - "learning_rate": 8.456632653061224e-05, - "loss": 1.0799, + "epoch": 10.22, + "learning_rate": 7.144886363636363e-05, + "loss": 0.452, "step": 905 }, { - "epoch": 18.33, - "learning_rate": 8.443877551020409e-05, - "loss": 0.9846, + "epoch": 10.24, + "learning_rate": 7.130681818181818e-05, + "loss": 0.4536, "step": 906 }, { - "epoch": 18.35, - "learning_rate": 8.431122448979592e-05, - "loss": 1.1, + "epoch": 10.25, + "learning_rate": 7.116477272727274e-05, + "loss": 0.4492, "step": 907 }, { - "epoch": 18.37, - "learning_rate": 8.418367346938776e-05, - "loss": 1.0787, + "epoch": 10.26, + "learning_rate": 7.102272727272727e-05, + "loss": 0.4401, "step": 908 }, { - "epoch": 18.39, - "learning_rate": 8.40561224489796e-05, - "loss": 1.0647, + "epoch": 10.27, + "learning_rate": 7.088068181818183e-05, + "loss": 0.4609, "step": 909 }, { - "epoch": 18.41, - "learning_rate": 8.392857142857144e-05, - "loss": 1.056, + "epoch": 10.28, + "learning_rate": 7.073863636363637e-05, + "loss": 0.4544, "step": 910 }, { - "epoch": 18.43, - "learning_rate": 8.380102040816327e-05, - "loss": 1.1131, + "epoch": 10.29, + "learning_rate": 7.05965909090909e-05, + "loss": 0.4477, "step": 911 }, { - "epoch": 18.45, - "learning_rate": 8.367346938775511e-05, - "loss": 1.0825, + "epoch": 10.3, + "learning_rate": 7.045454545454546e-05, + "loss": 0.4445, "step": 912 }, { - "epoch": 18.47, - "learning_rate": 8.354591836734695e-05, - "loss": 1.0681, + "epoch": 10.31, + "learning_rate": 7.031250000000001e-05, + "loss": 0.4544, "step": 913 }, { - "epoch": 18.49, - "learning_rate": 8.341836734693878e-05, - "loss": 1.0479, + "epoch": 10.33, + "learning_rate": 7.017045454545454e-05, + "loss": 0.4634, "step": 914 }, { - "epoch": 18.51, - "learning_rate": 8.329081632653062e-05, - "loss": 1.0921, + "epoch": 10.34, + "learning_rate": 7.00284090909091e-05, + "loss": 0.4499, "step": 915 }, { - "epoch": 18.53, - "learning_rate": 8.316326530612245e-05, - "loss": 1.0626, + "epoch": 10.35, + "learning_rate": 6.988636363636364e-05, + "loss": 0.4354, "step": 916 }, { - "epoch": 18.55, - "learning_rate": 8.30357142857143e-05, - "loss": 1.0518, + "epoch": 10.36, + "learning_rate": 6.974431818181818e-05, + "loss": 0.454, "step": 917 }, { - "epoch": 18.57, - "learning_rate": 8.290816326530613e-05, - "loss": 1.0557, + "epoch": 10.37, + "learning_rate": 6.960227272727273e-05, + "loss": 0.4473, "step": 918 }, { - "epoch": 18.6, - "learning_rate": 8.278061224489796e-05, - "loss": 1.0831, + "epoch": 10.38, + "learning_rate": 6.946022727272728e-05, + "loss": 0.4347, "step": 919 }, { - "epoch": 18.62, - "learning_rate": 8.26530612244898e-05, - "loss": 1.0307, + "epoch": 10.39, + "learning_rate": 6.931818181818182e-05, + "loss": 0.441, "step": 920 }, { - "epoch": 18.64, - "learning_rate": 8.252551020408163e-05, - "loss": 1.0455, + "epoch": 10.4, + "learning_rate": 6.917613636363637e-05, + "loss": 0.4545, "step": 921 }, { - "epoch": 18.66, - "learning_rate": 8.239795918367348e-05, - "loss": 1.0667, + "epoch": 10.42, + "learning_rate": 6.903409090909091e-05, + "loss": 0.458, "step": 922 }, { - "epoch": 18.68, - "learning_rate": 8.227040816326531e-05, - "loss": 1.0736, + "epoch": 10.43, + "learning_rate": 6.889204545454546e-05, + "loss": 0.4381, "step": 923 }, { - "epoch": 18.7, - "learning_rate": 8.214285714285714e-05, - "loss": 1.0108, + "epoch": 10.44, + "learning_rate": 6.875e-05, + "loss": 0.441, "step": 924 }, { - "epoch": 18.72, - "learning_rate": 8.201530612244898e-05, - "loss": 1.0458, + "epoch": 10.45, + "learning_rate": 6.860795454545455e-05, + "loss": 0.4446, "step": 925 }, { - "epoch": 18.74, - "learning_rate": 8.188775510204081e-05, - "loss": 1.0852, + "epoch": 10.46, + "learning_rate": 6.84659090909091e-05, + "loss": 0.4548, "step": 926 }, { - "epoch": 18.76, - "learning_rate": 8.176020408163265e-05, - "loss": 1.1207, + "epoch": 10.47, + "learning_rate": 6.832386363636364e-05, + "loss": 0.4404, "step": 927 }, { - "epoch": 18.78, - "learning_rate": 8.163265306122449e-05, - "loss": 1.0914, + "epoch": 10.48, + "learning_rate": 6.818181818181818e-05, + "loss": 0.4446, "step": 928 }, { - "epoch": 18.8, - "learning_rate": 8.150510204081633e-05, - "loss": 1.1108, + "epoch": 10.5, + "learning_rate": 6.803977272727273e-05, + "loss": 0.4434, "step": 929 }, { - "epoch": 18.82, - "learning_rate": 8.137755102040817e-05, - "loss": 1.1394, + "epoch": 10.51, + "learning_rate": 6.789772727272727e-05, + "loss": 0.4778, "step": 930 }, { - "epoch": 18.84, - "learning_rate": 8.125000000000001e-05, - "loss": 1.029, + "epoch": 10.52, + "learning_rate": 6.775568181818182e-05, + "loss": 0.4356, "step": 931 }, { - "epoch": 18.86, - "learning_rate": 8.112244897959184e-05, - "loss": 1.0661, + "epoch": 10.53, + "learning_rate": 6.761363636363636e-05, + "loss": 0.4464, "step": 932 }, { - "epoch": 18.88, - "learning_rate": 8.099489795918369e-05, - "loss": 1.0303, + "epoch": 10.54, + "learning_rate": 6.747159090909091e-05, + "loss": 0.4387, "step": 933 }, { - "epoch": 18.9, - "learning_rate": 8.086734693877552e-05, - "loss": 1.1144, + "epoch": 10.55, + "learning_rate": 6.732954545454547e-05, + "loss": 0.456, "step": 934 }, { - "epoch": 18.92, - "learning_rate": 8.073979591836736e-05, - "loss": 1.1096, + "epoch": 10.56, + "learning_rate": 6.71875e-05, + "loss": 0.453, "step": 935 }, { - "epoch": 18.94, - "learning_rate": 8.061224489795919e-05, - "loss": 1.123, + "epoch": 10.57, + "learning_rate": 6.704545454545455e-05, + "loss": 0.4611, "step": 936 }, { - "epoch": 18.96, - "learning_rate": 8.048469387755102e-05, - "loss": 1.1002, + "epoch": 10.59, + "learning_rate": 6.69034090909091e-05, + "loss": 0.4354, "step": 937 }, { - "epoch": 18.98, - "learning_rate": 8.035714285714287e-05, - "loss": 1.1016, + "epoch": 10.6, + "learning_rate": 6.676136363636364e-05, + "loss": 0.4519, "step": 938 }, { - "epoch": 19.0, - "learning_rate": 8.02295918367347e-05, - "loss": 1.0847, + "epoch": 10.61, + "learning_rate": 6.661931818181818e-05, + "loss": 0.4435, "step": 939 }, { - "epoch": 19.02, - "learning_rate": 8.010204081632653e-05, - "loss": 1.1029, + "epoch": 10.62, + "learning_rate": 6.647727272727274e-05, + "loss": 0.4422, "step": 940 }, { - "epoch": 19.04, - "learning_rate": 7.997448979591837e-05, - "loss": 1.041, + "epoch": 10.63, + "learning_rate": 6.633522727272727e-05, + "loss": 0.4344, "step": 941 }, { - "epoch": 19.06, - "learning_rate": 7.98469387755102e-05, - "loss": 1.01, + "epoch": 10.64, + "learning_rate": 6.619318181818183e-05, + "loss": 0.4419, "step": 942 }, { - "epoch": 19.08, - "learning_rate": 7.971938775510205e-05, - "loss": 1.0197, + "epoch": 10.65, + "learning_rate": 6.605113636363637e-05, + "loss": 0.4308, "step": 943 }, { - "epoch": 19.1, - "learning_rate": 7.959183673469388e-05, - "loss": 1.0543, + "epoch": 10.66, + "learning_rate": 6.59090909090909e-05, + "loss": 0.4043, "step": 944 }, { - "epoch": 19.12, - "learning_rate": 7.946428571428571e-05, - "loss": 1.0369, + "epoch": 10.68, + "learning_rate": 6.576704545454546e-05, + "loss": 0.4626, "step": 945 }, { - "epoch": 19.14, - "learning_rate": 7.933673469387755e-05, - "loss": 1.0154, + "epoch": 10.69, + "learning_rate": 6.562500000000001e-05, + "loss": 0.4365, "step": 946 }, { - "epoch": 19.16, - "learning_rate": 7.920918367346939e-05, - "loss": 0.9546, + "epoch": 10.7, + "learning_rate": 6.548295454545454e-05, + "loss": 0.4397, "step": 947 }, { - "epoch": 19.18, - "learning_rate": 7.908163265306123e-05, - "loss": 0.9982, + "epoch": 10.71, + "learning_rate": 6.53409090909091e-05, + "loss": 0.4463, "step": 948 }, { - "epoch": 19.2, - "learning_rate": 7.895408163265306e-05, - "loss": 1.0748, + "epoch": 10.72, + "learning_rate": 6.519886363636364e-05, + "loss": 0.4394, "step": 949 }, { - "epoch": 19.22, - "learning_rate": 7.882653061224489e-05, - "loss": 1.0562, + "epoch": 10.73, + "learning_rate": 6.505681818181818e-05, + "loss": 0.45, "step": 950 }, { - "epoch": 19.24, - "learning_rate": 7.869897959183674e-05, - "loss": 1.0352, + "epoch": 10.74, + "learning_rate": 6.491477272727273e-05, + "loss": 0.4363, "step": 951 }, { - "epoch": 19.26, - "learning_rate": 7.857142857142858e-05, - "loss": 0.9976, + "epoch": 10.76, + "learning_rate": 6.477272727272728e-05, + "loss": 0.4566, "step": 952 }, { - "epoch": 19.28, - "learning_rate": 7.844387755102041e-05, - "loss": 1.0221, + "epoch": 10.77, + "learning_rate": 6.463068181818183e-05, + "loss": 0.4235, "step": 953 }, { - "epoch": 19.3, - "learning_rate": 7.831632653061226e-05, - "loss": 1.0119, + "epoch": 10.78, + "learning_rate": 6.448863636363637e-05, + "loss": 0.4458, "step": 954 }, { - "epoch": 19.32, - "learning_rate": 7.818877551020409e-05, - "loss": 1.0657, + "epoch": 10.79, + "learning_rate": 6.434659090909092e-05, + "loss": 0.423, "step": 955 }, { - "epoch": 19.34, - "learning_rate": 7.806122448979593e-05, - "loss": 0.9591, + "epoch": 10.8, + "learning_rate": 6.420454545454546e-05, + "loss": 0.445, "step": 956 }, { - "epoch": 19.36, - "learning_rate": 7.793367346938776e-05, - "loss": 1.0101, + "epoch": 10.81, + "learning_rate": 6.40625e-05, + "loss": 0.424, "step": 957 }, { - "epoch": 19.38, - "learning_rate": 7.780612244897959e-05, - "loss": 1.0453, + "epoch": 10.82, + "learning_rate": 6.392045454545455e-05, + "loss": 0.4224, "step": 958 }, { - "epoch": 19.4, - "learning_rate": 7.767857142857144e-05, - "loss": 1.0461, + "epoch": 10.83, + "learning_rate": 6.37784090909091e-05, + "loss": 0.4223, "step": 959 }, { - "epoch": 19.42, - "learning_rate": 7.755102040816327e-05, - "loss": 1.0959, + "epoch": 10.85, + "learning_rate": 6.363636363636364e-05, + "loss": 0.4314, "step": 960 }, { - "epoch": 19.44, - "learning_rate": 7.742346938775511e-05, - "loss": 1.0608, + "epoch": 10.86, + "learning_rate": 6.349431818181819e-05, + "loss": 0.4488, "step": 961 }, { - "epoch": 19.47, - "learning_rate": 7.729591836734694e-05, - "loss": 1.1177, + "epoch": 10.87, + "learning_rate": 6.335227272727273e-05, + "loss": 0.423, "step": 962 }, { - "epoch": 19.49, - "learning_rate": 7.716836734693877e-05, - "loss": 1.0354, + "epoch": 10.88, + "learning_rate": 6.321022727272728e-05, + "loss": 0.4416, "step": 963 }, { - "epoch": 19.51, - "learning_rate": 7.704081632653062e-05, - "loss": 1.0507, + "epoch": 10.89, + "learning_rate": 6.306818181818182e-05, + "loss": 0.423, "step": 964 }, { - "epoch": 19.53, - "learning_rate": 7.691326530612245e-05, - "loss": 1.0313, + "epoch": 10.9, + "learning_rate": 6.292613636363637e-05, + "loss": 0.4502, "step": 965 }, { - "epoch": 19.55, - "learning_rate": 7.67857142857143e-05, - "loss": 1.0569, + "epoch": 10.91, + "learning_rate": 6.278409090909091e-05, + "loss": 0.4266, "step": 966 }, { - "epoch": 19.57, - "learning_rate": 7.665816326530612e-05, - "loss": 1.0862, + "epoch": 10.92, + "learning_rate": 6.264204545454546e-05, + "loss": 0.4344, "step": 967 }, { - "epoch": 19.59, - "learning_rate": 7.653061224489796e-05, - "loss": 1.0593, + "epoch": 10.94, + "learning_rate": 6.25e-05, + "loss": 0.434, "step": 968 }, { - "epoch": 19.61, - "learning_rate": 7.64030612244898e-05, - "loss": 1.0602, + "epoch": 10.95, + "learning_rate": 6.235795454545455e-05, + "loss": 0.4269, "step": 969 }, { - "epoch": 19.63, - "learning_rate": 7.627551020408163e-05, - "loss": 1.0048, + "epoch": 10.96, + "learning_rate": 6.221590909090909e-05, + "loss": 0.4158, "step": 970 }, { - "epoch": 19.65, - "learning_rate": 7.614795918367347e-05, - "loss": 1.0346, + "epoch": 10.97, + "learning_rate": 6.207386363636364e-05, + "loss": 0.4231, "step": 971 }, { - "epoch": 19.67, - "learning_rate": 7.60204081632653e-05, - "loss": 1.0172, + "epoch": 10.98, + "learning_rate": 6.193181818181818e-05, + "loss": 0.4235, "step": 972 }, { - "epoch": 19.69, - "learning_rate": 7.589285714285714e-05, - "loss": 1.02, + "epoch": 10.99, + "learning_rate": 6.178977272727273e-05, + "loss": 0.4504, "step": 973 }, { - "epoch": 19.71, - "learning_rate": 7.576530612244898e-05, - "loss": 1.0028, + "epoch": 11.0, + "learning_rate": 6.164772727272727e-05, + "loss": 0.4394, "step": 974 }, { - "epoch": 19.73, - "learning_rate": 7.563775510204083e-05, - "loss": 1.08, + "epoch": 11.02, + "learning_rate": 6.150568181818183e-05, + "loss": 0.4333, "step": 975 }, { - "epoch": 19.75, - "learning_rate": 7.551020408163266e-05, - "loss": 1.0402, + "epoch": 11.03, + "learning_rate": 6.136363636363636e-05, + "loss": 0.3936, "step": 976 }, { - "epoch": 19.77, - "learning_rate": 7.53826530612245e-05, - "loss": 1.0567, + "epoch": 11.04, + "learning_rate": 6.122159090909091e-05, + "loss": 0.3933, "step": 977 }, { - "epoch": 19.79, - "learning_rate": 7.525510204081633e-05, - "loss": 1.0169, + "epoch": 11.05, + "learning_rate": 6.107954545454547e-05, + "loss": 0.4161, "step": 978 }, { - "epoch": 19.81, - "learning_rate": 7.512755102040818e-05, - "loss": 0.9881, + "epoch": 11.06, + "learning_rate": 6.0937500000000004e-05, + "loss": 0.4097, "step": 979 }, { - "epoch": 19.83, - "learning_rate": 7.500000000000001e-05, - "loss": 1.0677, + "epoch": 11.07, + "learning_rate": 6.079545454545454e-05, + "loss": 0.412, "step": 980 }, { - "epoch": 19.85, - "learning_rate": 7.487244897959184e-05, - "loss": 1.1026, + "epoch": 11.08, + "learning_rate": 6.0653409090909094e-05, + "loss": 0.4104, "step": 981 }, { - "epoch": 19.87, - "learning_rate": 7.474489795918368e-05, - "loss": 1.0101, + "epoch": 11.09, + "learning_rate": 6.051136363636364e-05, + "loss": 0.4152, "step": 982 }, { - "epoch": 19.89, - "learning_rate": 7.461734693877551e-05, - "loss": 1.069, + "epoch": 11.11, + "learning_rate": 6.036931818181818e-05, + "loss": 0.4037, "step": 983 }, { - "epoch": 19.91, - "learning_rate": 7.448979591836736e-05, - "loss": 1.0493, + "epoch": 11.12, + "learning_rate": 6.022727272727273e-05, + "loss": 0.413, "step": 984 }, { - "epoch": 19.93, - "learning_rate": 7.436224489795919e-05, - "loss": 1.0858, + "epoch": 11.13, + "learning_rate": 6.0085227272727274e-05, + "loss": 0.4413, "step": 985 }, { - "epoch": 19.95, - "learning_rate": 7.423469387755102e-05, - "loss": 1.0734, + "epoch": 11.14, + "learning_rate": 5.9943181818181826e-05, + "loss": 0.3908, "step": 986 }, { - "epoch": 19.97, - "learning_rate": 7.410714285714286e-05, - "loss": 1.0203, + "epoch": 11.15, + "learning_rate": 5.9801136363636365e-05, + "loss": 0.3982, "step": 987 }, { - "epoch": 19.99, - "learning_rate": 7.39795918367347e-05, - "loss": 1.0285, + "epoch": 11.16, + "learning_rate": 5.965909090909091e-05, + "loss": 0.4109, "step": 988 }, { - "epoch": 20.01, - "learning_rate": 7.385204081632653e-05, - "loss": 0.9446, + "epoch": 11.17, + "learning_rate": 5.951704545454546e-05, + "loss": 0.3923, "step": 989 }, { - "epoch": 20.03, - "learning_rate": 7.372448979591837e-05, - "loss": 0.9915, + "epoch": 11.18, + "learning_rate": 5.9375e-05, + "loss": 0.4107, "step": 990 }, { - "epoch": 20.05, - "learning_rate": 7.35969387755102e-05, - "loss": 0.9882, + "epoch": 11.2, + "learning_rate": 5.9232954545454545e-05, + "loss": 0.4099, "step": 991 }, { - "epoch": 20.07, - "learning_rate": 7.346938775510205e-05, - "loss": 0.9338, + "epoch": 11.21, + "learning_rate": 5.90909090909091e-05, + "loss": 0.4163, "step": 992 }, { - "epoch": 20.09, - "learning_rate": 7.334183673469388e-05, - "loss": 0.942, + "epoch": 11.22, + "learning_rate": 5.8948863636363635e-05, + "loss": 0.4189, "step": 993 }, { - "epoch": 20.11, - "learning_rate": 7.321428571428571e-05, - "loss": 0.9725, + "epoch": 11.23, + "learning_rate": 5.880681818181818e-05, + "loss": 0.3889, "step": 994 }, { - "epoch": 20.13, - "learning_rate": 7.308673469387755e-05, - "loss": 1.027, + "epoch": 11.24, + "learning_rate": 5.866477272727273e-05, + "loss": 0.3988, "step": 995 }, { - "epoch": 20.15, - "learning_rate": 7.29591836734694e-05, - "loss": 1.0081, + "epoch": 11.25, + "learning_rate": 5.852272727272727e-05, + "loss": 0.4215, "step": 996 }, { - "epoch": 20.17, - "learning_rate": 7.283163265306123e-05, - "loss": 1.0117, + "epoch": 11.26, + "learning_rate": 5.838068181818183e-05, + "loss": 0.4207, "step": 997 }, { - "epoch": 20.19, - "learning_rate": 7.270408163265307e-05, - "loss": 0.969, + "epoch": 11.27, + "learning_rate": 5.823863636363637e-05, + "loss": 0.413, "step": 998 }, { - "epoch": 20.21, - "learning_rate": 7.25765306122449e-05, - "loss": 1.0024, + "epoch": 11.29, + "learning_rate": 5.8096590909090906e-05, + "loss": 0.4057, "step": 999 }, { - "epoch": 20.23, - "learning_rate": 7.244897959183675e-05, - "loss": 0.994, + "epoch": 11.3, + "learning_rate": 5.7954545454545464e-05, + "loss": 0.3939, "step": 1000 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 1.4801648710853222e+18, + "total_flos": 1.3647437694522778e+18, "trial_name": null, "trial_params": null } diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-1000/training_args.bin +++ b/checkpoint-1000/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-1100/adapter_model.bin b/checkpoint-1100/adapter_model.bin index 7dcf0dc8d5443bd22121bd973b23076054b3ffc1..b0c372ea5e6bb9ba9144e31528b00cbe54585c4a 100644 --- a/checkpoint-1100/adapter_model.bin +++ b/checkpoint-1100/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:475f2b2ac94b5181337d2afa8e4c9ca58b6bb0a30162d0174dc0d849d31a30b0 +oid sha256:5c4d9203c081d3cd793800a0facc03411d40e2f93788f5b95bdfa968eb1645c1 size 39409357 diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt index b17fb83a70f4d6556bd2414b6993be01bb4b300d..96da47b35aa614dc0d2fd08e71e7090e8060f156 100644 --- a/checkpoint-1100/optimizer.pt +++ b/checkpoint-1100/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2866f6f91b96afb3514ac044018e0555e8cbf2349cf0707fcab04ed3fddf495 +oid sha256:a9dba50b77a684018228b1b562dc170ee8a5e6c31c67ed3f1963e3e85f7adbe1 size 78844421 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth index 6c28f5beeeb3e640e81e05216ffbd6b8723d5723..961069ec281e351c01eb6735b97b75dd3d8d33b7 100644 --- a/checkpoint-1100/rng_state.pth +++ b/checkpoint-1100/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a2ce2e2326ca04fc964861061399d5335411f688384a0f62f8f943f1fff584e +oid sha256:696b4cef517e79a03215d5c5fccde0c73d2d962fe70ebab1e472f650136142b2 size 14575 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt index 93a79f6932abedd9808abc7ecd95a072e0a7d19f..3fb997f5b184a72c73e1ed47a3a47b72638b4cbf 100644 --- a/checkpoint-1100/scheduler.pt +++ b/checkpoint-1100/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4088610c769d87bd694b880357ee2736f8d9456a674ba2deac6ae25ba56fffaa +oid sha256:bf976c23393d274589d6444fcab30c2d19ead89a0a911884f99807904f62c00b size 627 diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json index aaf6a541ca340e29c7cd0e489d61a46e1a63d6b3..4b3079060bc1fe55aee51170296a362fc0f30c31 100644 --- a/checkpoint-1100/trainer_state.json +++ b/checkpoint-1100/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 22.25735061650332, + "epoch": 12.427184466019417, "eval_steps": 500, "global_step": 1100, "is_hyper_param_search": false, @@ -9,6611 +9,6611 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 }, { - "epoch": 14.18, - "learning_rate": 0.00011058673469387754, - "loss": 1.2069, + "epoch": 7.92, + "learning_rate": 0.00010042613636363636, + "loss": 0.6857, "step": 701 }, { - "epoch": 14.2, - "learning_rate": 0.00011045918367346939, - "loss": 1.1968, + "epoch": 7.93, + "learning_rate": 0.00010028409090909093, + "loss": 0.6677, "step": 702 }, { - "epoch": 14.22, - "learning_rate": 0.00011033163265306123, - "loss": 1.2236, + "epoch": 7.94, + "learning_rate": 0.00010014204545454546, + "loss": 0.6697, "step": 703 }, { - "epoch": 14.24, - "learning_rate": 0.00011020408163265306, - "loss": 1.1942, + "epoch": 7.95, + "learning_rate": 0.0001, + "loss": 0.6375, "step": 704 }, { - "epoch": 14.26, - "learning_rate": 0.0001100765306122449, - "loss": 1.2561, + "epoch": 7.96, + "learning_rate": 9.985795454545455e-05, + "loss": 0.6572, "step": 705 }, { - "epoch": 14.29, - "learning_rate": 0.00010994897959183674, - "loss": 1.1839, + "epoch": 7.98, + "learning_rate": 9.97159090909091e-05, + "loss": 0.668, "step": 706 }, { - "epoch": 14.31, - "learning_rate": 0.00010982142857142858, - "loss": 1.2128, + "epoch": 7.99, + "learning_rate": 9.957386363636364e-05, + "loss": 0.6797, "step": 707 }, { - "epoch": 14.33, - "learning_rate": 0.00010969387755102041, - "loss": 1.3086, + "epoch": 8.0, + "learning_rate": 9.943181818181819e-05, + "loss": 0.6784, "step": 708 }, { - "epoch": 14.35, - "learning_rate": 0.00010956632653061226, - "loss": 1.2379, + "epoch": 8.01, + "learning_rate": 9.928977272727273e-05, + "loss": 0.6192, "step": 709 }, { - "epoch": 14.37, - "learning_rate": 0.0001094387755102041, - "loss": 1.176, + "epoch": 8.02, + "learning_rate": 9.914772727272728e-05, + "loss": 0.6287, "step": 710 }, { - "epoch": 14.39, - "learning_rate": 0.00010931122448979592, - "loss": 1.2105, + "epoch": 8.03, + "learning_rate": 9.900568181818183e-05, + "loss": 0.6034, "step": 711 }, { - "epoch": 14.41, - "learning_rate": 0.00010918367346938776, - "loss": 1.2149, + "epoch": 8.04, + "learning_rate": 9.886363636363637e-05, + "loss": 0.6167, "step": 712 }, { - "epoch": 14.43, - "learning_rate": 0.0001090561224489796, - "loss": 1.2392, + "epoch": 8.06, + "learning_rate": 9.872159090909091e-05, + "loss": 0.6353, "step": 713 }, { - "epoch": 14.45, - "learning_rate": 0.00010892857142857142, - "loss": 1.2471, + "epoch": 8.07, + "learning_rate": 9.857954545454547e-05, + "loss": 0.6222, "step": 714 }, { - "epoch": 14.47, - "learning_rate": 0.00010880102040816327, - "loss": 1.2561, + "epoch": 8.08, + "learning_rate": 9.84375e-05, + "loss": 0.5963, "step": 715 }, { - "epoch": 14.49, - "learning_rate": 0.00010867346938775511, - "loss": 1.2179, + "epoch": 8.09, + "learning_rate": 9.829545454545455e-05, + "loss": 0.6042, "step": 716 }, { - "epoch": 14.51, - "learning_rate": 0.00010854591836734696, - "loss": 1.2459, + "epoch": 8.1, + "learning_rate": 9.81534090909091e-05, + "loss": 0.612, "step": 717 }, { - "epoch": 14.53, - "learning_rate": 0.00010841836734693877, - "loss": 1.2933, + "epoch": 8.11, + "learning_rate": 9.801136363636364e-05, + "loss": 0.6069, "step": 718 }, { - "epoch": 14.55, - "learning_rate": 0.00010829081632653062, - "loss": 1.2862, + "epoch": 8.12, + "learning_rate": 9.786931818181818e-05, + "loss": 0.6001, "step": 719 }, { - "epoch": 14.57, - "learning_rate": 0.00010816326530612246, - "loss": 1.2976, + "epoch": 8.13, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6007, "step": 720 }, { - "epoch": 14.59, - "learning_rate": 0.00010803571428571428, - "loss": 1.231, + "epoch": 8.15, + "learning_rate": 9.758522727272727e-05, + "loss": 0.6079, "step": 721 }, { - "epoch": 14.61, - "learning_rate": 0.00010790816326530613, - "loss": 1.2464, + "epoch": 8.16, + "learning_rate": 9.744318181818183e-05, + "loss": 0.6216, "step": 722 }, { - "epoch": 14.63, - "learning_rate": 0.00010778061224489797, - "loss": 1.2181, + "epoch": 8.17, + "learning_rate": 9.730113636363637e-05, + "loss": 0.6321, "step": 723 }, { - "epoch": 14.65, - "learning_rate": 0.00010765306122448979, - "loss": 1.3307, + "epoch": 8.18, + "learning_rate": 9.71590909090909e-05, + "loss": 0.6044, "step": 724 }, { - "epoch": 14.67, - "learning_rate": 0.00010752551020408163, - "loss": 1.1723, + "epoch": 8.19, + "learning_rate": 9.701704545454547e-05, + "loss": 0.6028, "step": 725 }, { - "epoch": 14.69, - "learning_rate": 0.00010739795918367348, - "loss": 1.1528, + "epoch": 8.2, + "learning_rate": 9.687500000000001e-05, + "loss": 0.6098, "step": 726 }, { - "epoch": 14.71, - "learning_rate": 0.0001072704081632653, - "loss": 1.215, + "epoch": 8.21, + "learning_rate": 9.673295454545454e-05, + "loss": 0.6032, "step": 727 }, { - "epoch": 14.73, - "learning_rate": 0.00010714285714285715, - "loss": 1.2624, + "epoch": 8.22, + "learning_rate": 9.65909090909091e-05, + "loss": 0.6298, "step": 728 }, { - "epoch": 14.75, - "learning_rate": 0.00010701530612244898, - "loss": 1.3117, + "epoch": 8.24, + "learning_rate": 9.644886363636365e-05, + "loss": 0.6115, "step": 729 }, { - "epoch": 14.77, - "learning_rate": 0.00010688775510204083, - "loss": 1.2572, + "epoch": 8.25, + "learning_rate": 9.630681818181818e-05, + "loss": 0.6052, "step": 730 }, { - "epoch": 14.79, - "learning_rate": 0.00010676020408163266, - "loss": 1.222, + "epoch": 8.26, + "learning_rate": 9.616477272727274e-05, + "loss": 0.6097, "step": 731 }, { - "epoch": 14.81, - "learning_rate": 0.0001066326530612245, - "loss": 1.2881, + "epoch": 8.27, + "learning_rate": 9.602272727272728e-05, + "loss": 0.6062, "step": 732 }, { - "epoch": 14.83, - "learning_rate": 0.00010650510204081635, - "loss": 1.2676, + "epoch": 8.28, + "learning_rate": 9.588068181818183e-05, + "loss": 0.5984, "step": 733 }, { - "epoch": 14.85, - "learning_rate": 0.00010637755102040816, - "loss": 1.2734, + "epoch": 8.29, + "learning_rate": 9.573863636363637e-05, + "loss": 0.6432, "step": 734 }, { - "epoch": 14.87, - "learning_rate": 0.00010625000000000001, - "loss": 1.2885, + "epoch": 8.3, + "learning_rate": 9.559659090909092e-05, + "loss": 0.5814, "step": 735 }, { - "epoch": 14.89, - "learning_rate": 0.00010612244897959185, - "loss": 1.2764, + "epoch": 8.31, + "learning_rate": 9.545454545454546e-05, + "loss": 0.5965, "step": 736 }, { - "epoch": 14.91, - "learning_rate": 0.00010599489795918367, - "loss": 1.3267, + "epoch": 8.33, + "learning_rate": 9.53125e-05, + "loss": 0.6102, "step": 737 }, { - "epoch": 14.93, - "learning_rate": 0.00010586734693877551, - "loss": 1.2445, + "epoch": 8.34, + "learning_rate": 9.517045454545455e-05, + "loss": 0.5849, "step": 738 }, { - "epoch": 14.95, - "learning_rate": 0.00010573979591836736, - "loss": 1.3359, + "epoch": 8.35, + "learning_rate": 9.50284090909091e-05, + "loss": 0.6062, "step": 739 }, { - "epoch": 14.97, - "learning_rate": 0.00010561224489795918, - "loss": 1.2508, + "epoch": 8.36, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6031, "step": 740 }, { - "epoch": 14.99, - "learning_rate": 0.00010548469387755102, - "loss": 1.2227, + "epoch": 8.37, + "learning_rate": 9.474431818181819e-05, + "loss": 0.5932, "step": 741 }, { - "epoch": 15.01, - "learning_rate": 0.00010535714285714286, - "loss": 1.1889, + "epoch": 8.38, + "learning_rate": 9.460227272727273e-05, + "loss": 0.589, "step": 742 }, { - "epoch": 15.03, - "learning_rate": 0.00010522959183673471, - "loss": 1.1919, + "epoch": 8.39, + "learning_rate": 9.446022727272728e-05, + "loss": 0.6096, "step": 743 }, { - "epoch": 15.05, - "learning_rate": 0.00010510204081632653, - "loss": 1.2383, + "epoch": 8.41, + "learning_rate": 9.431818181818182e-05, + "loss": 0.601, "step": 744 }, { - "epoch": 15.07, - "learning_rate": 0.00010497448979591837, - "loss": 1.2401, + "epoch": 8.42, + "learning_rate": 9.417613636363637e-05, + "loss": 0.5798, "step": 745 }, { - "epoch": 15.09, - "learning_rate": 0.00010484693877551021, - "loss": 1.2015, + "epoch": 8.43, + "learning_rate": 9.403409090909091e-05, + "loss": 0.59, "step": 746 }, { - "epoch": 15.11, - "learning_rate": 0.00010471938775510203, - "loss": 1.1509, + "epoch": 8.44, + "learning_rate": 9.389204545454546e-05, + "loss": 0.5988, "step": 747 }, { - "epoch": 15.13, - "learning_rate": 0.00010459183673469388, - "loss": 1.1878, + "epoch": 8.45, + "learning_rate": 9.375e-05, + "loss": 0.5591, "step": 748 }, { - "epoch": 15.16, - "learning_rate": 0.00010446428571428572, - "loss": 1.1706, + "epoch": 8.46, + "learning_rate": 9.360795454545455e-05, + "loss": 0.5939, "step": 749 }, { - "epoch": 15.18, - "learning_rate": 0.00010433673469387755, - "loss": 1.1285, + "epoch": 8.47, + "learning_rate": 9.346590909090909e-05, + "loss": 0.5886, "step": 750 }, { - "epoch": 15.2, - "learning_rate": 0.0001042091836734694, - "loss": 1.1608, + "epoch": 8.48, + "learning_rate": 9.332386363636364e-05, + "loss": 0.5994, "step": 751 }, { - "epoch": 15.22, - "learning_rate": 0.00010408163265306123, - "loss": 1.1178, + "epoch": 8.5, + "learning_rate": 9.318181818181818e-05, + "loss": 0.5821, "step": 752 }, { - "epoch": 15.24, - "learning_rate": 0.00010395408163265306, - "loss": 1.1293, + "epoch": 8.51, + "learning_rate": 9.303977272727273e-05, + "loss": 0.602, "step": 753 }, { - "epoch": 15.26, - "learning_rate": 0.0001038265306122449, - "loss": 1.2306, + "epoch": 8.52, + "learning_rate": 9.289772727272727e-05, + "loss": 0.5708, "step": 754 }, { - "epoch": 15.28, - "learning_rate": 0.00010369897959183675, - "loss": 1.1541, + "epoch": 8.53, + "learning_rate": 9.275568181818183e-05, + "loss": 0.5902, "step": 755 }, { - "epoch": 15.3, - "learning_rate": 0.00010357142857142859, - "loss": 1.1702, + "epoch": 8.54, + "learning_rate": 9.261363636363636e-05, + "loss": 0.6053, "step": 756 }, { - "epoch": 15.32, - "learning_rate": 0.00010344387755102041, - "loss": 1.2119, + "epoch": 8.55, + "learning_rate": 9.247159090909091e-05, + "loss": 0.5797, "step": 757 }, { - "epoch": 15.34, - "learning_rate": 0.00010331632653061225, - "loss": 1.2239, + "epoch": 8.56, + "learning_rate": 9.232954545454547e-05, + "loss": 0.5965, "step": 758 }, { - "epoch": 15.36, - "learning_rate": 0.0001031887755102041, - "loss": 1.2019, + "epoch": 8.57, + "learning_rate": 9.21875e-05, + "loss": 0.5738, "step": 759 }, { - "epoch": 15.38, - "learning_rate": 0.00010306122448979591, - "loss": 1.2197, + "epoch": 8.59, + "learning_rate": 9.204545454545454e-05, + "loss": 0.5819, "step": 760 }, { - "epoch": 15.4, - "learning_rate": 0.00010293367346938776, - "loss": 1.1769, + "epoch": 8.6, + "learning_rate": 9.19034090909091e-05, + "loss": 0.5994, "step": 761 }, { - "epoch": 15.42, - "learning_rate": 0.0001028061224489796, - "loss": 1.1907, + "epoch": 8.61, + "learning_rate": 9.176136363636363e-05, + "loss": 0.5738, "step": 762 }, { - "epoch": 15.44, - "learning_rate": 0.00010267857142857142, - "loss": 1.2089, + "epoch": 8.62, + "learning_rate": 9.161931818181818e-05, + "loss": 0.5663, "step": 763 }, { - "epoch": 15.46, - "learning_rate": 0.00010255102040816327, - "loss": 1.1335, + "epoch": 8.63, + "learning_rate": 9.147727272727274e-05, + "loss": 0.5798, "step": 764 }, { - "epoch": 15.48, - "learning_rate": 0.00010242346938775511, - "loss": 1.1633, + "epoch": 8.64, + "learning_rate": 9.133522727272727e-05, + "loss": 0.5705, "step": 765 }, { - "epoch": 15.5, - "learning_rate": 0.00010229591836734695, - "loss": 1.1578, + "epoch": 8.65, + "learning_rate": 9.119318181818183e-05, + "loss": 0.5943, "step": 766 }, { - "epoch": 15.52, - "learning_rate": 0.00010216836734693877, - "loss": 1.2236, + "epoch": 8.67, + "learning_rate": 9.105113636363637e-05, + "loss": 0.6019, "step": 767 }, { - "epoch": 15.54, - "learning_rate": 0.00010204081632653062, - "loss": 1.1941, + "epoch": 8.68, + "learning_rate": 9.090909090909092e-05, + "loss": 0.5733, "step": 768 }, { - "epoch": 15.56, - "learning_rate": 0.00010191326530612246, - "loss": 1.2666, + "epoch": 8.69, + "learning_rate": 9.076704545454546e-05, + "loss": 0.575, "step": 769 }, { - "epoch": 15.58, - "learning_rate": 0.00010178571428571428, - "loss": 1.1232, + "epoch": 8.7, + "learning_rate": 9.062500000000001e-05, + "loss": 0.5675, "step": 770 }, { - "epoch": 15.6, - "learning_rate": 0.00010165816326530612, - "loss": 1.2242, + "epoch": 8.71, + "learning_rate": 9.048295454545455e-05, + "loss": 0.566, "step": 771 }, { - "epoch": 15.62, - "learning_rate": 0.00010153061224489797, - "loss": 1.1852, + "epoch": 8.72, + "learning_rate": 9.03409090909091e-05, + "loss": 0.5513, "step": 772 }, { - "epoch": 15.64, - "learning_rate": 0.0001014030612244898, - "loss": 1.2626, + "epoch": 8.73, + "learning_rate": 9.019886363636364e-05, + "loss": 0.5682, "step": 773 }, { - "epoch": 15.66, - "learning_rate": 0.00010127551020408164, - "loss": 1.1873, + "epoch": 8.74, + "learning_rate": 9.005681818181819e-05, + "loss": 0.5508, "step": 774 }, { - "epoch": 15.68, - "learning_rate": 0.00010114795918367349, - "loss": 1.3005, + "epoch": 8.76, + "learning_rate": 8.991477272727273e-05, + "loss": 0.5668, "step": 775 }, { - "epoch": 15.7, - "learning_rate": 0.0001010204081632653, - "loss": 1.1904, + "epoch": 8.77, + "learning_rate": 8.977272727272728e-05, + "loss": 0.569, "step": 776 }, { - "epoch": 15.72, - "learning_rate": 0.00010089285714285715, - "loss": 1.2927, + "epoch": 8.78, + "learning_rate": 8.963068181818182e-05, + "loss": 0.5897, "step": 777 }, { - "epoch": 15.74, - "learning_rate": 0.00010076530612244899, - "loss": 1.179, + "epoch": 8.79, + "learning_rate": 8.948863636363637e-05, + "loss": 0.5738, "step": 778 }, { - "epoch": 15.76, - "learning_rate": 0.00010063775510204084, - "loss": 1.2027, + "epoch": 8.8, + "learning_rate": 8.934659090909091e-05, + "loss": 0.5511, "step": 779 }, { - "epoch": 15.78, - "learning_rate": 0.00010051020408163265, - "loss": 1.2428, + "epoch": 8.81, + "learning_rate": 8.920454545454546e-05, + "loss": 0.5659, "step": 780 }, { - "epoch": 15.8, - "learning_rate": 0.0001003826530612245, - "loss": 1.2324, + "epoch": 8.82, + "learning_rate": 8.90625e-05, + "loss": 0.5649, "step": 781 }, { - "epoch": 15.82, - "learning_rate": 0.00010025510204081634, - "loss": 1.1251, + "epoch": 8.83, + "learning_rate": 8.892045454545455e-05, + "loss": 0.5618, "step": 782 }, { - "epoch": 15.84, - "learning_rate": 0.00010012755102040816, - "loss": 1.2405, + "epoch": 8.85, + "learning_rate": 8.87784090909091e-05, + "loss": 0.5602, "step": 783 }, { - "epoch": 15.86, - "learning_rate": 0.0001, - "loss": 1.2005, + "epoch": 8.86, + "learning_rate": 8.863636363636364e-05, + "loss": 0.5723, "step": 784 }, { - "epoch": 15.88, - "learning_rate": 9.987244897959184e-05, - "loss": 1.2259, + "epoch": 8.87, + "learning_rate": 8.849431818181818e-05, + "loss": 0.5816, "step": 785 }, { - "epoch": 15.9, - "learning_rate": 9.974489795918368e-05, - "loss": 1.1576, + "epoch": 8.88, + "learning_rate": 8.835227272727273e-05, + "loss": 0.555, "step": 786 }, { - "epoch": 15.92, - "learning_rate": 9.961734693877551e-05, - "loss": 1.1834, + "epoch": 8.89, + "learning_rate": 8.821022727272727e-05, + "loss": 0.5563, "step": 787 }, { - "epoch": 15.94, - "learning_rate": 9.948979591836736e-05, - "loss": 1.2396, + "epoch": 8.9, + "learning_rate": 8.806818181818183e-05, + "loss": 0.554, "step": 788 }, { - "epoch": 15.96, - "learning_rate": 9.936224489795919e-05, - "loss": 1.1865, + "epoch": 8.91, + "learning_rate": 8.792613636363636e-05, + "loss": 0.5671, "step": 789 }, { - "epoch": 15.98, - "learning_rate": 9.923469387755102e-05, - "loss": 1.2356, + "epoch": 8.92, + "learning_rate": 8.778409090909091e-05, + "loss": 0.5485, "step": 790 }, { - "epoch": 16.01, - "learning_rate": 9.910714285714286e-05, - "loss": 1.2639, + "epoch": 8.94, + "learning_rate": 8.764204545454547e-05, + "loss": 0.5712, "step": 791 }, { - "epoch": 16.03, - "learning_rate": 9.897959183673469e-05, - "loss": 1.1216, + "epoch": 8.95, + "learning_rate": 8.75e-05, + "loss": 0.5507, "step": 792 }, { - "epoch": 16.05, - "learning_rate": 9.885204081632652e-05, - "loss": 1.1051, + "epoch": 8.96, + "learning_rate": 8.735795454545454e-05, + "loss": 0.5718, "step": 793 }, { - "epoch": 16.07, - "learning_rate": 9.872448979591837e-05, - "loss": 1.0864, + "epoch": 8.97, + "learning_rate": 8.72159090909091e-05, + "loss": 0.5585, "step": 794 }, { - "epoch": 16.09, - "learning_rate": 9.859693877551021e-05, - "loss": 1.182, + "epoch": 8.98, + "learning_rate": 8.707386363636363e-05, + "loss": 0.5563, "step": 795 }, { - "epoch": 16.11, - "learning_rate": 9.846938775510204e-05, - "loss": 1.1272, + "epoch": 8.99, + "learning_rate": 8.693181818181818e-05, + "loss": 0.581, "step": 796 }, { - "epoch": 16.13, - "learning_rate": 9.834183673469389e-05, - "loss": 1.1946, + "epoch": 9.0, + "learning_rate": 8.678977272727274e-05, + "loss": 0.5511, "step": 797 }, { - "epoch": 16.15, - "learning_rate": 9.821428571428572e-05, - "loss": 1.0875, + "epoch": 9.02, + "learning_rate": 8.664772727272727e-05, + "loss": 0.5103, "step": 798 }, { - "epoch": 16.17, - "learning_rate": 9.808673469387756e-05, - "loss": 1.1671, + "epoch": 9.03, + "learning_rate": 8.650568181818183e-05, + "loss": 0.5323, "step": 799 }, { - "epoch": 16.19, - "learning_rate": 9.79591836734694e-05, - "loss": 1.1502, + "epoch": 9.04, + "learning_rate": 8.636363636363637e-05, + "loss": 0.5092, "step": 800 }, { - "epoch": 16.21, - "learning_rate": 9.783163265306124e-05, - "loss": 1.19, + "epoch": 9.05, + "learning_rate": 8.62215909090909e-05, + "loss": 0.5247, "step": 801 }, { - "epoch": 16.23, - "learning_rate": 9.770408163265307e-05, - "loss": 1.1258, + "epoch": 9.06, + "learning_rate": 8.607954545454546e-05, + "loss": 0.5403, "step": 802 }, { - "epoch": 16.25, - "learning_rate": 9.75765306122449e-05, - "loss": 1.1765, + "epoch": 9.07, + "learning_rate": 8.593750000000001e-05, + "loss": 0.5252, "step": 803 }, { - "epoch": 16.27, - "learning_rate": 9.744897959183674e-05, - "loss": 1.1217, + "epoch": 9.08, + "learning_rate": 8.579545454545454e-05, + "loss": 0.5296, "step": 804 }, { - "epoch": 16.29, - "learning_rate": 9.732142857142858e-05, - "loss": 1.1293, + "epoch": 9.09, + "learning_rate": 8.56534090909091e-05, + "loss": 0.5223, "step": 805 }, { - "epoch": 16.31, - "learning_rate": 9.719387755102042e-05, - "loss": 1.17, + "epoch": 9.11, + "learning_rate": 8.551136363636364e-05, + "loss": 0.4972, "step": 806 }, { - "epoch": 16.33, - "learning_rate": 9.706632653061225e-05, - "loss": 1.17, + "epoch": 9.12, + "learning_rate": 8.536931818181818e-05, + "loss": 0.5005, "step": 807 }, { - "epoch": 16.35, - "learning_rate": 9.693877551020408e-05, - "loss": 1.2004, + "epoch": 9.13, + "learning_rate": 8.522727272727273e-05, + "loss": 0.5249, "step": 808 }, { - "epoch": 16.37, - "learning_rate": 9.681122448979593e-05, - "loss": 1.1648, + "epoch": 9.14, + "learning_rate": 8.508522727272728e-05, + "loss": 0.5135, "step": 809 }, { - "epoch": 16.39, - "learning_rate": 9.668367346938776e-05, - "loss": 1.0688, + "epoch": 9.15, + "learning_rate": 8.494318181818182e-05, + "loss": 0.5053, "step": 810 }, { - "epoch": 16.41, - "learning_rate": 9.655612244897959e-05, - "loss": 1.1607, + "epoch": 9.16, + "learning_rate": 8.480113636363637e-05, + "loss": 0.5158, "step": 811 }, { - "epoch": 16.43, - "learning_rate": 9.642857142857143e-05, - "loss": 1.1298, + "epoch": 9.17, + "learning_rate": 8.465909090909091e-05, + "loss": 0.5061, "step": 812 }, { - "epoch": 16.45, - "learning_rate": 9.630102040816326e-05, - "loss": 1.1064, + "epoch": 9.18, + "learning_rate": 8.451704545454546e-05, + "loss": 0.4988, "step": 813 }, { - "epoch": 16.47, - "learning_rate": 9.617346938775511e-05, - "loss": 1.1472, + "epoch": 9.2, + "learning_rate": 8.4375e-05, + "loss": 0.5273, "step": 814 }, { - "epoch": 16.49, - "learning_rate": 9.604591836734694e-05, - "loss": 1.1577, + "epoch": 9.21, + "learning_rate": 8.423295454545455e-05, + "loss": 0.5332, "step": 815 }, { - "epoch": 16.51, - "learning_rate": 9.591836734693878e-05, - "loss": 1.1436, + "epoch": 9.22, + "learning_rate": 8.40909090909091e-05, + "loss": 0.5181, "step": 816 }, { - "epoch": 16.53, - "learning_rate": 9.579081632653061e-05, - "loss": 1.1657, + "epoch": 9.23, + "learning_rate": 8.394886363636364e-05, + "loss": 0.5085, "step": 817 }, { - "epoch": 16.55, - "learning_rate": 9.566326530612246e-05, - "loss": 1.1147, + "epoch": 9.24, + "learning_rate": 8.380681818181818e-05, + "loss": 0.5137, "step": 818 }, { - "epoch": 16.57, - "learning_rate": 9.553571428571429e-05, - "loss": 1.1839, + "epoch": 9.25, + "learning_rate": 8.366477272727273e-05, + "loss": 0.5195, "step": 819 }, { - "epoch": 16.59, - "learning_rate": 9.540816326530613e-05, - "loss": 1.1298, + "epoch": 9.26, + "learning_rate": 8.352272727272727e-05, + "loss": 0.5077, "step": 820 }, { - "epoch": 16.61, - "learning_rate": 9.528061224489796e-05, - "loss": 1.2141, + "epoch": 9.28, + "learning_rate": 8.338068181818183e-05, + "loss": 0.5074, "step": 821 }, { - "epoch": 16.63, - "learning_rate": 9.515306122448981e-05, - "loss": 1.2045, + "epoch": 9.29, + "learning_rate": 8.323863636363637e-05, + "loss": 0.5142, "step": 822 }, { - "epoch": 16.65, - "learning_rate": 9.502551020408164e-05, - "loss": 1.1791, + "epoch": 9.3, + "learning_rate": 8.309659090909091e-05, + "loss": 0.5116, "step": 823 }, { - "epoch": 16.67, - "learning_rate": 9.489795918367348e-05, - "loss": 1.1137, + "epoch": 9.31, + "learning_rate": 8.295454545454547e-05, + "loss": 0.4974, "step": 824 }, { - "epoch": 16.69, - "learning_rate": 9.477040816326531e-05, - "loss": 1.1312, + "epoch": 9.32, + "learning_rate": 8.28125e-05, + "loss": 0.5117, "step": 825 }, { - "epoch": 16.71, - "learning_rate": 9.464285714285715e-05, - "loss": 1.1102, + "epoch": 9.33, + "learning_rate": 8.267045454545455e-05, + "loss": 0.5114, "step": 826 }, { - "epoch": 16.73, - "learning_rate": 9.451530612244899e-05, - "loss": 1.1865, + "epoch": 9.34, + "learning_rate": 8.25284090909091e-05, + "loss": 0.5039, "step": 827 }, { - "epoch": 16.75, - "learning_rate": 9.438775510204082e-05, - "loss": 1.1232, + "epoch": 9.35, + "learning_rate": 8.238636363636364e-05, + "loss": 0.498, "step": 828 }, { - "epoch": 16.77, - "learning_rate": 9.426020408163265e-05, - "loss": 1.2068, + "epoch": 9.37, + "learning_rate": 8.224431818181818e-05, + "loss": 0.5042, "step": 829 }, { - "epoch": 16.79, - "learning_rate": 9.41326530612245e-05, - "loss": 1.1864, + "epoch": 9.38, + "learning_rate": 8.210227272727274e-05, + "loss": 0.5049, "step": 830 }, { - "epoch": 16.81, - "learning_rate": 9.400510204081633e-05, - "loss": 1.2195, + "epoch": 9.39, + "learning_rate": 8.196022727272727e-05, + "loss": 0.5123, "step": 831 }, { - "epoch": 16.83, - "learning_rate": 9.387755102040817e-05, - "loss": 1.2063, + "epoch": 9.4, + "learning_rate": 8.181818181818183e-05, + "loss": 0.4907, "step": 832 }, { - "epoch": 16.85, - "learning_rate": 9.375e-05, - "loss": 1.1455, + "epoch": 9.41, + "learning_rate": 8.167613636363637e-05, + "loss": 0.5267, "step": 833 }, { - "epoch": 16.88, - "learning_rate": 9.362244897959183e-05, - "loss": 1.1819, + "epoch": 9.42, + "learning_rate": 8.15340909090909e-05, + "loss": 0.5314, "step": 834 }, { - "epoch": 16.9, - "learning_rate": 9.349489795918368e-05, - "loss": 1.1887, + "epoch": 9.43, + "learning_rate": 8.139204545454546e-05, + "loss": 0.4952, "step": 835 }, { - "epoch": 16.92, - "learning_rate": 9.336734693877551e-05, - "loss": 1.1557, + "epoch": 9.44, + "learning_rate": 8.125000000000001e-05, + "loss": 0.5014, "step": 836 }, { - "epoch": 16.94, - "learning_rate": 9.323979591836735e-05, - "loss": 1.2094, + "epoch": 9.46, + "learning_rate": 8.110795454545454e-05, + "loss": 0.4967, "step": 837 }, { - "epoch": 16.96, - "learning_rate": 9.311224489795918e-05, - "loss": 1.1512, + "epoch": 9.47, + "learning_rate": 8.09659090909091e-05, + "loss": 0.5116, "step": 838 }, { - "epoch": 16.98, - "learning_rate": 9.298469387755103e-05, - "loss": 1.1463, + "epoch": 9.48, + "learning_rate": 8.082386363636365e-05, + "loss": 0.5119, "step": 839 }, { - "epoch": 17.0, - "learning_rate": 9.285714285714286e-05, - "loss": 1.155, + "epoch": 9.49, + "learning_rate": 8.068181818181818e-05, + "loss": 0.4987, "step": 840 }, { - "epoch": 17.02, - "learning_rate": 9.27295918367347e-05, - "loss": 1.1292, + "epoch": 9.5, + "learning_rate": 8.053977272727274e-05, + "loss": 0.5063, "step": 841 }, { - "epoch": 17.04, - "learning_rate": 9.260204081632653e-05, - "loss": 1.0996, + "epoch": 9.51, + "learning_rate": 8.039772727272728e-05, + "loss": 0.5019, "step": 842 }, { - "epoch": 17.06, - "learning_rate": 9.247448979591838e-05, - "loss": 1.0662, + "epoch": 9.52, + "learning_rate": 8.025568181818183e-05, + "loss": 0.5272, "step": 843 }, { - "epoch": 17.08, - "learning_rate": 9.234693877551021e-05, - "loss": 1.0931, + "epoch": 9.54, + "learning_rate": 8.011363636363637e-05, + "loss": 0.4969, "step": 844 }, { - "epoch": 17.1, - "learning_rate": 9.221938775510205e-05, - "loss": 1.0727, + "epoch": 9.55, + "learning_rate": 7.997159090909092e-05, + "loss": 0.5222, "step": 845 }, { - "epoch": 17.12, - "learning_rate": 9.209183673469388e-05, - "loss": 1.1043, + "epoch": 9.56, + "learning_rate": 7.982954545454546e-05, + "loss": 0.4729, "step": 846 }, { - "epoch": 17.14, - "learning_rate": 9.196428571428572e-05, - "loss": 1.0594, + "epoch": 9.57, + "learning_rate": 7.96875e-05, + "loss": 0.4976, "step": 847 }, { - "epoch": 17.16, - "learning_rate": 9.183673469387756e-05, - "loss": 1.0952, + "epoch": 9.58, + "learning_rate": 7.954545454545455e-05, + "loss": 0.4974, "step": 848 }, { - "epoch": 17.18, - "learning_rate": 9.170918367346939e-05, - "loss": 1.0639, + "epoch": 9.59, + "learning_rate": 7.94034090909091e-05, + "loss": 0.4849, "step": 849 }, { - "epoch": 17.2, - "learning_rate": 9.158163265306124e-05, - "loss": 1.132, + "epoch": 9.6, + "learning_rate": 7.926136363636364e-05, + "loss": 0.4897, "step": 850 }, { - "epoch": 17.22, - "learning_rate": 9.145408163265307e-05, - "loss": 1.1083, + "epoch": 9.61, + "learning_rate": 7.911931818181819e-05, + "loss": 0.4962, "step": 851 }, { - "epoch": 17.24, - "learning_rate": 9.13265306122449e-05, - "loss": 1.1282, + "epoch": 9.63, + "learning_rate": 7.897727272727273e-05, + "loss": 0.4877, "step": 852 }, { - "epoch": 17.26, - "learning_rate": 9.119897959183674e-05, - "loss": 1.0474, + "epoch": 9.64, + "learning_rate": 7.883522727272728e-05, + "loss": 0.4921, "step": 853 }, { - "epoch": 17.28, - "learning_rate": 9.107142857142857e-05, - "loss": 1.1138, + "epoch": 9.65, + "learning_rate": 7.869318181818182e-05, + "loss": 0.4969, "step": 854 }, { - "epoch": 17.3, - "learning_rate": 9.094387755102042e-05, - "loss": 1.1025, + "epoch": 9.66, + "learning_rate": 7.855113636363637e-05, + "loss": 0.5045, "step": 855 }, { - "epoch": 17.32, - "learning_rate": 9.081632653061225e-05, - "loss": 1.0968, + "epoch": 9.67, + "learning_rate": 7.840909090909091e-05, + "loss": 0.5207, "step": 856 }, { - "epoch": 17.34, - "learning_rate": 9.068877551020408e-05, - "loss": 1.1683, + "epoch": 9.68, + "learning_rate": 7.826704545454546e-05, + "loss": 0.5098, "step": 857 }, { - "epoch": 17.36, - "learning_rate": 9.056122448979592e-05, - "loss": 1.0975, + "epoch": 9.69, + "learning_rate": 7.8125e-05, + "loss": 0.5005, "step": 858 }, { - "epoch": 17.38, - "learning_rate": 9.043367346938775e-05, - "loss": 1.1274, + "epoch": 9.7, + "learning_rate": 7.798295454545455e-05, + "loss": 0.5028, "step": 859 }, { - "epoch": 17.4, - "learning_rate": 9.030612244897958e-05, - "loss": 1.0916, + "epoch": 9.72, + "learning_rate": 7.784090909090909e-05, + "loss": 0.5067, "step": 860 }, { - "epoch": 17.42, - "learning_rate": 9.017857142857143e-05, - "loss": 1.0912, + "epoch": 9.73, + "learning_rate": 7.769886363636364e-05, + "loss": 0.484, "step": 861 }, { - "epoch": 17.44, - "learning_rate": 9.005102040816327e-05, - "loss": 1.0875, + "epoch": 9.74, + "learning_rate": 7.755681818181818e-05, + "loss": 0.5029, "step": 862 }, { - "epoch": 17.46, - "learning_rate": 8.99234693877551e-05, - "loss": 1.05, + "epoch": 9.75, + "learning_rate": 7.741477272727273e-05, + "loss": 0.5077, "step": 863 }, { - "epoch": 17.48, - "learning_rate": 8.979591836734695e-05, - "loss": 1.1418, + "epoch": 9.76, + "learning_rate": 7.727272727272727e-05, + "loss": 0.5091, "step": 864 }, { - "epoch": 17.5, - "learning_rate": 8.966836734693878e-05, - "loss": 1.0609, + "epoch": 9.77, + "learning_rate": 7.713068181818183e-05, + "loss": 0.4781, "step": 865 }, { - "epoch": 17.52, - "learning_rate": 8.954081632653062e-05, - "loss": 1.1611, + "epoch": 9.78, + "learning_rate": 7.698863636363636e-05, + "loss": 0.5124, "step": 866 }, { - "epoch": 17.54, - "learning_rate": 8.941326530612245e-05, - "loss": 1.1065, + "epoch": 9.79, + "learning_rate": 7.684659090909091e-05, + "loss": 0.4859, "step": 867 }, { - "epoch": 17.56, - "learning_rate": 8.92857142857143e-05, - "loss": 1.1611, + "epoch": 9.81, + "learning_rate": 7.670454545454547e-05, + "loss": 0.4872, "step": 868 }, { - "epoch": 17.58, - "learning_rate": 8.915816326530613e-05, - "loss": 1.1398, + "epoch": 9.82, + "learning_rate": 7.65625e-05, + "loss": 0.4675, "step": 869 }, { - "epoch": 17.6, - "learning_rate": 8.903061224489796e-05, - "loss": 1.1055, + "epoch": 9.83, + "learning_rate": 7.642045454545454e-05, + "loss": 0.5056, "step": 870 }, { - "epoch": 17.62, - "learning_rate": 8.89030612244898e-05, - "loss": 1.1314, + "epoch": 9.84, + "learning_rate": 7.62784090909091e-05, + "loss": 0.4868, "step": 871 }, { - "epoch": 17.64, - "learning_rate": 8.877551020408164e-05, - "loss": 1.1084, + "epoch": 9.85, + "learning_rate": 7.613636363636363e-05, + "loss": 0.4907, "step": 872 }, { - "epoch": 17.66, - "learning_rate": 8.864795918367348e-05, - "loss": 1.1254, + "epoch": 9.86, + "learning_rate": 7.599431818181818e-05, + "loss": 0.474, "step": 873 }, { - "epoch": 17.68, - "learning_rate": 8.852040816326531e-05, - "loss": 1.142, + "epoch": 9.87, + "learning_rate": 7.585227272727274e-05, + "loss": 0.4813, "step": 874 }, { - "epoch": 17.7, - "learning_rate": 8.839285714285714e-05, - "loss": 1.1371, + "epoch": 9.89, + "learning_rate": 7.571022727272727e-05, + "loss": 0.4838, "step": 875 }, { - "epoch": 17.72, - "learning_rate": 8.826530612244899e-05, - "loss": 1.1092, + "epoch": 9.9, + "learning_rate": 7.556818181818183e-05, + "loss": 0.4935, "step": 876 }, { - "epoch": 17.75, - "learning_rate": 8.813775510204082e-05, - "loss": 1.161, + "epoch": 9.91, + "learning_rate": 7.542613636363637e-05, + "loss": 0.4884, "step": 877 }, { - "epoch": 17.77, - "learning_rate": 8.801020408163265e-05, - "loss": 1.1044, + "epoch": 9.92, + "learning_rate": 7.52840909090909e-05, + "loss": 0.4797, "step": 878 }, { - "epoch": 17.79, - "learning_rate": 8.788265306122449e-05, - "loss": 1.117, + "epoch": 9.93, + "learning_rate": 7.514204545454546e-05, + "loss": 0.479, "step": 879 }, { - "epoch": 17.81, - "learning_rate": 8.775510204081632e-05, - "loss": 1.1262, + "epoch": 9.94, + "learning_rate": 7.500000000000001e-05, + "loss": 0.4727, "step": 880 }, { - "epoch": 17.83, - "learning_rate": 8.762755102040817e-05, - "loss": 1.0829, + "epoch": 9.95, + "learning_rate": 7.485795454545454e-05, + "loss": 0.4758, "step": 881 }, { - "epoch": 17.85, - "learning_rate": 8.75e-05, - "loss": 1.1393, + "epoch": 9.96, + "learning_rate": 7.47159090909091e-05, + "loss": 0.482, "step": 882 }, { - "epoch": 17.87, - "learning_rate": 8.737244897959183e-05, - "loss": 1.1781, + "epoch": 9.98, + "learning_rate": 7.457386363636364e-05, + "loss": 0.4951, "step": 883 }, { - "epoch": 17.89, - "learning_rate": 8.724489795918367e-05, - "loss": 1.1582, + "epoch": 9.99, + "learning_rate": 7.443181818181817e-05, + "loss": 0.4823, "step": 884 }, { - "epoch": 17.91, - "learning_rate": 8.711734693877552e-05, - "loss": 1.1469, + "epoch": 10.0, + "learning_rate": 7.428977272727273e-05, + "loss": 0.4638, "step": 885 }, { - "epoch": 17.93, - "learning_rate": 8.698979591836735e-05, - "loss": 1.1494, + "epoch": 10.01, + "learning_rate": 7.414772727272728e-05, + "loss": 0.4715, "step": 886 }, { - "epoch": 17.95, - "learning_rate": 8.68622448979592e-05, - "loss": 1.1251, + "epoch": 10.02, + "learning_rate": 7.400568181818182e-05, + "loss": 0.461, "step": 887 }, { - "epoch": 17.97, - "learning_rate": 8.673469387755102e-05, - "loss": 1.1624, + "epoch": 10.03, + "learning_rate": 7.386363636363637e-05, + "loss": 0.4429, "step": 888 }, { - "epoch": 17.99, - "learning_rate": 8.660714285714287e-05, - "loss": 1.0842, + "epoch": 10.04, + "learning_rate": 7.372159090909091e-05, + "loss": 0.4403, "step": 889 }, { - "epoch": 18.01, - "learning_rate": 8.64795918367347e-05, - "loss": 1.1944, + "epoch": 10.05, + "learning_rate": 7.357954545454546e-05, + "loss": 0.4519, "step": 890 }, { - "epoch": 18.03, - "learning_rate": 8.635204081632653e-05, - "loss": 1.0642, + "epoch": 10.07, + "learning_rate": 7.34375e-05, + "loss": 0.4611, "step": 891 }, { - "epoch": 18.05, - "learning_rate": 8.622448979591838e-05, - "loss": 1.0459, + "epoch": 10.08, + "learning_rate": 7.329545454545455e-05, + "loss": 0.4543, "step": 892 }, { - "epoch": 18.07, - "learning_rate": 8.60969387755102e-05, - "loss": 1.0941, + "epoch": 10.09, + "learning_rate": 7.315340909090909e-05, + "loss": 0.4528, "step": 893 }, { - "epoch": 18.09, - "learning_rate": 8.596938775510205e-05, - "loss": 1.0457, + "epoch": 10.1, + "learning_rate": 7.301136363636364e-05, + "loss": 0.4586, "step": 894 }, { - "epoch": 18.11, - "learning_rate": 8.584183673469388e-05, - "loss": 1.1033, + "epoch": 10.11, + "learning_rate": 7.286931818181818e-05, + "loss": 0.4418, "step": 895 }, { - "epoch": 18.13, - "learning_rate": 8.571428571428571e-05, - "loss": 1.0756, + "epoch": 10.12, + "learning_rate": 7.272727272727273e-05, + "loss": 0.4435, "step": 896 }, { - "epoch": 18.15, - "learning_rate": 8.558673469387756e-05, - "loss": 1.0615, + "epoch": 10.13, + "learning_rate": 7.258522727272727e-05, + "loss": 0.44, "step": 897 }, { - "epoch": 18.17, - "learning_rate": 8.545918367346939e-05, - "loss": 1.0828, + "epoch": 10.15, + "learning_rate": 7.244318181818183e-05, + "loss": 0.4589, "step": 898 }, { - "epoch": 18.19, - "learning_rate": 8.533163265306123e-05, - "loss": 1.1158, + "epoch": 10.16, + "learning_rate": 7.230113636363636e-05, + "loss": 0.4597, "step": 899 }, { - "epoch": 18.21, - "learning_rate": 8.520408163265306e-05, - "loss": 1.0133, + "epoch": 10.17, + "learning_rate": 7.215909090909091e-05, + "loss": 0.4479, "step": 900 }, { - "epoch": 18.23, - "learning_rate": 8.50765306122449e-05, - "loss": 1.0437, + "epoch": 10.18, + "learning_rate": 7.201704545454547e-05, + "loss": 0.4477, "step": 901 }, { - "epoch": 18.25, - "learning_rate": 8.494897959183674e-05, - "loss": 1.0372, + "epoch": 10.19, + "learning_rate": 7.1875e-05, + "loss": 0.446, "step": 902 }, { - "epoch": 18.27, - "learning_rate": 8.482142857142857e-05, - "loss": 1.1012, + "epoch": 10.2, + "learning_rate": 7.173295454545454e-05, + "loss": 0.4546, "step": 903 }, { - "epoch": 18.29, - "learning_rate": 8.469387755102041e-05, - "loss": 1.0777, + "epoch": 10.21, + "learning_rate": 7.15909090909091e-05, + "loss": 0.4347, "step": 904 }, { - "epoch": 18.31, - "learning_rate": 8.456632653061224e-05, - "loss": 1.0799, + "epoch": 10.22, + "learning_rate": 7.144886363636363e-05, + "loss": 0.452, "step": 905 }, { - "epoch": 18.33, - "learning_rate": 8.443877551020409e-05, - "loss": 0.9846, + "epoch": 10.24, + "learning_rate": 7.130681818181818e-05, + "loss": 0.4536, "step": 906 }, { - "epoch": 18.35, - "learning_rate": 8.431122448979592e-05, - "loss": 1.1, + "epoch": 10.25, + "learning_rate": 7.116477272727274e-05, + "loss": 0.4492, "step": 907 }, { - "epoch": 18.37, - "learning_rate": 8.418367346938776e-05, - "loss": 1.0787, + "epoch": 10.26, + "learning_rate": 7.102272727272727e-05, + "loss": 0.4401, "step": 908 }, { - "epoch": 18.39, - "learning_rate": 8.40561224489796e-05, - "loss": 1.0647, + "epoch": 10.27, + "learning_rate": 7.088068181818183e-05, + "loss": 0.4609, "step": 909 }, { - "epoch": 18.41, - "learning_rate": 8.392857142857144e-05, - "loss": 1.056, + "epoch": 10.28, + "learning_rate": 7.073863636363637e-05, + "loss": 0.4544, "step": 910 }, { - "epoch": 18.43, - "learning_rate": 8.380102040816327e-05, - "loss": 1.1131, + "epoch": 10.29, + "learning_rate": 7.05965909090909e-05, + "loss": 0.4477, "step": 911 }, { - "epoch": 18.45, - "learning_rate": 8.367346938775511e-05, - "loss": 1.0825, + "epoch": 10.3, + "learning_rate": 7.045454545454546e-05, + "loss": 0.4445, "step": 912 }, { - "epoch": 18.47, - "learning_rate": 8.354591836734695e-05, - "loss": 1.0681, + "epoch": 10.31, + "learning_rate": 7.031250000000001e-05, + "loss": 0.4544, "step": 913 }, { - "epoch": 18.49, - "learning_rate": 8.341836734693878e-05, - "loss": 1.0479, + "epoch": 10.33, + "learning_rate": 7.017045454545454e-05, + "loss": 0.4634, "step": 914 }, { - "epoch": 18.51, - "learning_rate": 8.329081632653062e-05, - "loss": 1.0921, + "epoch": 10.34, + "learning_rate": 7.00284090909091e-05, + "loss": 0.4499, "step": 915 }, { - "epoch": 18.53, - "learning_rate": 8.316326530612245e-05, - "loss": 1.0626, + "epoch": 10.35, + "learning_rate": 6.988636363636364e-05, + "loss": 0.4354, "step": 916 }, { - "epoch": 18.55, - "learning_rate": 8.30357142857143e-05, - "loss": 1.0518, + "epoch": 10.36, + "learning_rate": 6.974431818181818e-05, + "loss": 0.454, "step": 917 }, { - "epoch": 18.57, - "learning_rate": 8.290816326530613e-05, - "loss": 1.0557, + "epoch": 10.37, + "learning_rate": 6.960227272727273e-05, + "loss": 0.4473, "step": 918 }, { - "epoch": 18.6, - "learning_rate": 8.278061224489796e-05, - "loss": 1.0831, + "epoch": 10.38, + "learning_rate": 6.946022727272728e-05, + "loss": 0.4347, "step": 919 }, { - "epoch": 18.62, - "learning_rate": 8.26530612244898e-05, - "loss": 1.0307, + "epoch": 10.39, + "learning_rate": 6.931818181818182e-05, + "loss": 0.441, "step": 920 }, { - "epoch": 18.64, - "learning_rate": 8.252551020408163e-05, - "loss": 1.0455, + "epoch": 10.4, + "learning_rate": 6.917613636363637e-05, + "loss": 0.4545, "step": 921 }, { - "epoch": 18.66, - "learning_rate": 8.239795918367348e-05, - "loss": 1.0667, + "epoch": 10.42, + "learning_rate": 6.903409090909091e-05, + "loss": 0.458, "step": 922 }, { - "epoch": 18.68, - "learning_rate": 8.227040816326531e-05, - "loss": 1.0736, + "epoch": 10.43, + "learning_rate": 6.889204545454546e-05, + "loss": 0.4381, "step": 923 }, { - "epoch": 18.7, - "learning_rate": 8.214285714285714e-05, - "loss": 1.0108, + "epoch": 10.44, + "learning_rate": 6.875e-05, + "loss": 0.441, "step": 924 }, { - "epoch": 18.72, - "learning_rate": 8.201530612244898e-05, - "loss": 1.0458, + "epoch": 10.45, + "learning_rate": 6.860795454545455e-05, + "loss": 0.4446, "step": 925 }, { - "epoch": 18.74, - "learning_rate": 8.188775510204081e-05, - "loss": 1.0852, + "epoch": 10.46, + "learning_rate": 6.84659090909091e-05, + "loss": 0.4548, "step": 926 }, { - "epoch": 18.76, - "learning_rate": 8.176020408163265e-05, - "loss": 1.1207, + "epoch": 10.47, + "learning_rate": 6.832386363636364e-05, + "loss": 0.4404, "step": 927 }, { - "epoch": 18.78, - "learning_rate": 8.163265306122449e-05, - "loss": 1.0914, + "epoch": 10.48, + "learning_rate": 6.818181818181818e-05, + "loss": 0.4446, "step": 928 }, { - "epoch": 18.8, - "learning_rate": 8.150510204081633e-05, - "loss": 1.1108, + "epoch": 10.5, + "learning_rate": 6.803977272727273e-05, + "loss": 0.4434, "step": 929 }, { - "epoch": 18.82, - "learning_rate": 8.137755102040817e-05, - "loss": 1.1394, + "epoch": 10.51, + "learning_rate": 6.789772727272727e-05, + "loss": 0.4778, "step": 930 }, { - "epoch": 18.84, - "learning_rate": 8.125000000000001e-05, - "loss": 1.029, + "epoch": 10.52, + "learning_rate": 6.775568181818182e-05, + "loss": 0.4356, "step": 931 }, { - "epoch": 18.86, - "learning_rate": 8.112244897959184e-05, - "loss": 1.0661, + "epoch": 10.53, + "learning_rate": 6.761363636363636e-05, + "loss": 0.4464, "step": 932 }, { - "epoch": 18.88, - "learning_rate": 8.099489795918369e-05, - "loss": 1.0303, + "epoch": 10.54, + "learning_rate": 6.747159090909091e-05, + "loss": 0.4387, "step": 933 }, { - "epoch": 18.9, - "learning_rate": 8.086734693877552e-05, - "loss": 1.1144, + "epoch": 10.55, + "learning_rate": 6.732954545454547e-05, + "loss": 0.456, "step": 934 }, { - "epoch": 18.92, - "learning_rate": 8.073979591836736e-05, - "loss": 1.1096, + "epoch": 10.56, + "learning_rate": 6.71875e-05, + "loss": 0.453, "step": 935 }, { - "epoch": 18.94, - "learning_rate": 8.061224489795919e-05, - "loss": 1.123, + "epoch": 10.57, + "learning_rate": 6.704545454545455e-05, + "loss": 0.4611, "step": 936 }, { - "epoch": 18.96, - "learning_rate": 8.048469387755102e-05, - "loss": 1.1002, + "epoch": 10.59, + "learning_rate": 6.69034090909091e-05, + "loss": 0.4354, "step": 937 }, { - "epoch": 18.98, - "learning_rate": 8.035714285714287e-05, - "loss": 1.1016, + "epoch": 10.6, + "learning_rate": 6.676136363636364e-05, + "loss": 0.4519, "step": 938 }, { - "epoch": 19.0, - "learning_rate": 8.02295918367347e-05, - "loss": 1.0847, + "epoch": 10.61, + "learning_rate": 6.661931818181818e-05, + "loss": 0.4435, "step": 939 }, { - "epoch": 19.02, - "learning_rate": 8.010204081632653e-05, - "loss": 1.1029, + "epoch": 10.62, + "learning_rate": 6.647727272727274e-05, + "loss": 0.4422, "step": 940 }, { - "epoch": 19.04, - "learning_rate": 7.997448979591837e-05, - "loss": 1.041, + "epoch": 10.63, + "learning_rate": 6.633522727272727e-05, + "loss": 0.4344, "step": 941 }, { - "epoch": 19.06, - "learning_rate": 7.98469387755102e-05, - "loss": 1.01, + "epoch": 10.64, + "learning_rate": 6.619318181818183e-05, + "loss": 0.4419, "step": 942 }, { - "epoch": 19.08, - "learning_rate": 7.971938775510205e-05, - "loss": 1.0197, + "epoch": 10.65, + "learning_rate": 6.605113636363637e-05, + "loss": 0.4308, "step": 943 }, { - "epoch": 19.1, - "learning_rate": 7.959183673469388e-05, - "loss": 1.0543, + "epoch": 10.66, + "learning_rate": 6.59090909090909e-05, + "loss": 0.4043, "step": 944 }, { - "epoch": 19.12, - "learning_rate": 7.946428571428571e-05, - "loss": 1.0369, + "epoch": 10.68, + "learning_rate": 6.576704545454546e-05, + "loss": 0.4626, "step": 945 }, { - "epoch": 19.14, - "learning_rate": 7.933673469387755e-05, - "loss": 1.0154, + "epoch": 10.69, + "learning_rate": 6.562500000000001e-05, + "loss": 0.4365, "step": 946 }, { - "epoch": 19.16, - "learning_rate": 7.920918367346939e-05, - "loss": 0.9546, + "epoch": 10.7, + "learning_rate": 6.548295454545454e-05, + "loss": 0.4397, "step": 947 }, { - "epoch": 19.18, - "learning_rate": 7.908163265306123e-05, - "loss": 0.9982, + "epoch": 10.71, + "learning_rate": 6.53409090909091e-05, + "loss": 0.4463, "step": 948 }, { - "epoch": 19.2, - "learning_rate": 7.895408163265306e-05, - "loss": 1.0748, + "epoch": 10.72, + "learning_rate": 6.519886363636364e-05, + "loss": 0.4394, "step": 949 }, { - "epoch": 19.22, - "learning_rate": 7.882653061224489e-05, - "loss": 1.0562, + "epoch": 10.73, + "learning_rate": 6.505681818181818e-05, + "loss": 0.45, "step": 950 }, { - "epoch": 19.24, - "learning_rate": 7.869897959183674e-05, - "loss": 1.0352, + "epoch": 10.74, + "learning_rate": 6.491477272727273e-05, + "loss": 0.4363, "step": 951 }, { - "epoch": 19.26, - "learning_rate": 7.857142857142858e-05, - "loss": 0.9976, + "epoch": 10.76, + "learning_rate": 6.477272727272728e-05, + "loss": 0.4566, "step": 952 }, { - "epoch": 19.28, - "learning_rate": 7.844387755102041e-05, - "loss": 1.0221, + "epoch": 10.77, + "learning_rate": 6.463068181818183e-05, + "loss": 0.4235, "step": 953 }, { - "epoch": 19.3, - "learning_rate": 7.831632653061226e-05, - "loss": 1.0119, + "epoch": 10.78, + "learning_rate": 6.448863636363637e-05, + "loss": 0.4458, "step": 954 }, { - "epoch": 19.32, - "learning_rate": 7.818877551020409e-05, - "loss": 1.0657, + "epoch": 10.79, + "learning_rate": 6.434659090909092e-05, + "loss": 0.423, "step": 955 }, { - "epoch": 19.34, - "learning_rate": 7.806122448979593e-05, - "loss": 0.9591, + "epoch": 10.8, + "learning_rate": 6.420454545454546e-05, + "loss": 0.445, "step": 956 }, { - "epoch": 19.36, - "learning_rate": 7.793367346938776e-05, - "loss": 1.0101, + "epoch": 10.81, + "learning_rate": 6.40625e-05, + "loss": 0.424, "step": 957 }, { - "epoch": 19.38, - "learning_rate": 7.780612244897959e-05, - "loss": 1.0453, + "epoch": 10.82, + "learning_rate": 6.392045454545455e-05, + "loss": 0.4224, "step": 958 }, { - "epoch": 19.4, - "learning_rate": 7.767857142857144e-05, - "loss": 1.0461, + "epoch": 10.83, + "learning_rate": 6.37784090909091e-05, + "loss": 0.4223, "step": 959 }, { - "epoch": 19.42, - "learning_rate": 7.755102040816327e-05, - "loss": 1.0959, + "epoch": 10.85, + "learning_rate": 6.363636363636364e-05, + "loss": 0.4314, "step": 960 }, { - "epoch": 19.44, - "learning_rate": 7.742346938775511e-05, - "loss": 1.0608, + "epoch": 10.86, + "learning_rate": 6.349431818181819e-05, + "loss": 0.4488, "step": 961 }, { - "epoch": 19.47, - "learning_rate": 7.729591836734694e-05, - "loss": 1.1177, + "epoch": 10.87, + "learning_rate": 6.335227272727273e-05, + "loss": 0.423, "step": 962 }, { - "epoch": 19.49, - "learning_rate": 7.716836734693877e-05, - "loss": 1.0354, + "epoch": 10.88, + "learning_rate": 6.321022727272728e-05, + "loss": 0.4416, "step": 963 }, { - "epoch": 19.51, - "learning_rate": 7.704081632653062e-05, - "loss": 1.0507, + "epoch": 10.89, + "learning_rate": 6.306818181818182e-05, + "loss": 0.423, "step": 964 }, { - "epoch": 19.53, - "learning_rate": 7.691326530612245e-05, - "loss": 1.0313, + "epoch": 10.9, + "learning_rate": 6.292613636363637e-05, + "loss": 0.4502, "step": 965 }, { - "epoch": 19.55, - "learning_rate": 7.67857142857143e-05, - "loss": 1.0569, + "epoch": 10.91, + "learning_rate": 6.278409090909091e-05, + "loss": 0.4266, "step": 966 }, { - "epoch": 19.57, - "learning_rate": 7.665816326530612e-05, - "loss": 1.0862, + "epoch": 10.92, + "learning_rate": 6.264204545454546e-05, + "loss": 0.4344, "step": 967 }, { - "epoch": 19.59, - "learning_rate": 7.653061224489796e-05, - "loss": 1.0593, + "epoch": 10.94, + "learning_rate": 6.25e-05, + "loss": 0.434, "step": 968 }, { - "epoch": 19.61, - "learning_rate": 7.64030612244898e-05, - "loss": 1.0602, + "epoch": 10.95, + "learning_rate": 6.235795454545455e-05, + "loss": 0.4269, "step": 969 }, { - "epoch": 19.63, - "learning_rate": 7.627551020408163e-05, - "loss": 1.0048, + "epoch": 10.96, + "learning_rate": 6.221590909090909e-05, + "loss": 0.4158, "step": 970 }, { - "epoch": 19.65, - "learning_rate": 7.614795918367347e-05, - "loss": 1.0346, + "epoch": 10.97, + "learning_rate": 6.207386363636364e-05, + "loss": 0.4231, "step": 971 }, { - "epoch": 19.67, - "learning_rate": 7.60204081632653e-05, - "loss": 1.0172, + "epoch": 10.98, + "learning_rate": 6.193181818181818e-05, + "loss": 0.4235, "step": 972 }, { - "epoch": 19.69, - "learning_rate": 7.589285714285714e-05, - "loss": 1.02, + "epoch": 10.99, + "learning_rate": 6.178977272727273e-05, + "loss": 0.4504, "step": 973 }, { - "epoch": 19.71, - "learning_rate": 7.576530612244898e-05, - "loss": 1.0028, + "epoch": 11.0, + "learning_rate": 6.164772727272727e-05, + "loss": 0.4394, "step": 974 }, { - "epoch": 19.73, - "learning_rate": 7.563775510204083e-05, - "loss": 1.08, + "epoch": 11.02, + "learning_rate": 6.150568181818183e-05, + "loss": 0.4333, "step": 975 }, { - "epoch": 19.75, - "learning_rate": 7.551020408163266e-05, - "loss": 1.0402, + "epoch": 11.03, + "learning_rate": 6.136363636363636e-05, + "loss": 0.3936, "step": 976 }, { - "epoch": 19.77, - "learning_rate": 7.53826530612245e-05, - "loss": 1.0567, + "epoch": 11.04, + "learning_rate": 6.122159090909091e-05, + "loss": 0.3933, "step": 977 }, { - "epoch": 19.79, - "learning_rate": 7.525510204081633e-05, - "loss": 1.0169, + "epoch": 11.05, + "learning_rate": 6.107954545454547e-05, + "loss": 0.4161, "step": 978 }, { - "epoch": 19.81, - "learning_rate": 7.512755102040818e-05, - "loss": 0.9881, + "epoch": 11.06, + "learning_rate": 6.0937500000000004e-05, + "loss": 0.4097, "step": 979 }, { - "epoch": 19.83, - "learning_rate": 7.500000000000001e-05, - "loss": 1.0677, + "epoch": 11.07, + "learning_rate": 6.079545454545454e-05, + "loss": 0.412, "step": 980 }, { - "epoch": 19.85, - "learning_rate": 7.487244897959184e-05, - "loss": 1.1026, + "epoch": 11.08, + "learning_rate": 6.0653409090909094e-05, + "loss": 0.4104, "step": 981 }, { - "epoch": 19.87, - "learning_rate": 7.474489795918368e-05, - "loss": 1.0101, + "epoch": 11.09, + "learning_rate": 6.051136363636364e-05, + "loss": 0.4152, "step": 982 }, { - "epoch": 19.89, - "learning_rate": 7.461734693877551e-05, - "loss": 1.069, + "epoch": 11.11, + "learning_rate": 6.036931818181818e-05, + "loss": 0.4037, "step": 983 }, { - "epoch": 19.91, - "learning_rate": 7.448979591836736e-05, - "loss": 1.0493, + "epoch": 11.12, + "learning_rate": 6.022727272727273e-05, + "loss": 0.413, "step": 984 }, { - "epoch": 19.93, - "learning_rate": 7.436224489795919e-05, - "loss": 1.0858, + "epoch": 11.13, + "learning_rate": 6.0085227272727274e-05, + "loss": 0.4413, "step": 985 }, { - "epoch": 19.95, - "learning_rate": 7.423469387755102e-05, - "loss": 1.0734, + "epoch": 11.14, + "learning_rate": 5.9943181818181826e-05, + "loss": 0.3908, "step": 986 }, { - "epoch": 19.97, - "learning_rate": 7.410714285714286e-05, - "loss": 1.0203, + "epoch": 11.15, + "learning_rate": 5.9801136363636365e-05, + "loss": 0.3982, "step": 987 }, { - "epoch": 19.99, - "learning_rate": 7.39795918367347e-05, - "loss": 1.0285, + "epoch": 11.16, + "learning_rate": 5.965909090909091e-05, + "loss": 0.4109, "step": 988 }, { - "epoch": 20.01, - "learning_rate": 7.385204081632653e-05, - "loss": 0.9446, + "epoch": 11.17, + "learning_rate": 5.951704545454546e-05, + "loss": 0.3923, "step": 989 }, { - "epoch": 20.03, - "learning_rate": 7.372448979591837e-05, - "loss": 0.9915, + "epoch": 11.18, + "learning_rate": 5.9375e-05, + "loss": 0.4107, "step": 990 }, { - "epoch": 20.05, - "learning_rate": 7.35969387755102e-05, - "loss": 0.9882, + "epoch": 11.2, + "learning_rate": 5.9232954545454545e-05, + "loss": 0.4099, "step": 991 }, { - "epoch": 20.07, - "learning_rate": 7.346938775510205e-05, - "loss": 0.9338, + "epoch": 11.21, + "learning_rate": 5.90909090909091e-05, + "loss": 0.4163, "step": 992 }, { - "epoch": 20.09, - "learning_rate": 7.334183673469388e-05, - "loss": 0.942, + "epoch": 11.22, + "learning_rate": 5.8948863636363635e-05, + "loss": 0.4189, "step": 993 }, { - "epoch": 20.11, - "learning_rate": 7.321428571428571e-05, - "loss": 0.9725, + "epoch": 11.23, + "learning_rate": 5.880681818181818e-05, + "loss": 0.3889, "step": 994 }, { - "epoch": 20.13, - "learning_rate": 7.308673469387755e-05, - "loss": 1.027, + "epoch": 11.24, + "learning_rate": 5.866477272727273e-05, + "loss": 0.3988, "step": 995 }, { - "epoch": 20.15, - "learning_rate": 7.29591836734694e-05, - "loss": 1.0081, + "epoch": 11.25, + "learning_rate": 5.852272727272727e-05, + "loss": 0.4215, "step": 996 }, { - "epoch": 20.17, - "learning_rate": 7.283163265306123e-05, - "loss": 1.0117, + "epoch": 11.26, + "learning_rate": 5.838068181818183e-05, + "loss": 0.4207, "step": 997 }, { - "epoch": 20.19, - "learning_rate": 7.270408163265307e-05, - "loss": 0.969, + "epoch": 11.27, + "learning_rate": 5.823863636363637e-05, + "loss": 0.413, "step": 998 }, { - "epoch": 20.21, - "learning_rate": 7.25765306122449e-05, - "loss": 1.0024, + "epoch": 11.29, + "learning_rate": 5.8096590909090906e-05, + "loss": 0.4057, "step": 999 }, { - "epoch": 20.23, - "learning_rate": 7.244897959183675e-05, - "loss": 0.994, + "epoch": 11.3, + "learning_rate": 5.7954545454545464e-05, + "loss": 0.3939, "step": 1000 }, { - "epoch": 20.25, - "learning_rate": 7.232142857142858e-05, - "loss": 1.0248, + "epoch": 11.31, + "learning_rate": 5.78125e-05, + "loss": 0.4199, "step": 1001 }, { - "epoch": 20.27, - "learning_rate": 7.219387755102042e-05, - "loss": 1.0493, + "epoch": 11.32, + "learning_rate": 5.767045454545454e-05, + "loss": 0.4076, "step": 1002 }, { - "epoch": 20.29, - "learning_rate": 7.206632653061225e-05, - "loss": 1.0011, + "epoch": 11.33, + "learning_rate": 5.75284090909091e-05, + "loss": 0.4079, "step": 1003 }, { - "epoch": 20.31, - "learning_rate": 7.193877551020408e-05, - "loss": 0.9874, + "epoch": 11.34, + "learning_rate": 5.738636363636364e-05, + "loss": 0.4002, "step": 1004 }, { - "epoch": 20.34, - "learning_rate": 7.181122448979593e-05, - "loss": 1.0049, + "epoch": 11.35, + "learning_rate": 5.724431818181818e-05, + "loss": 0.3801, "step": 1005 }, { - "epoch": 20.36, - "learning_rate": 7.168367346938776e-05, - "loss": 1.0314, + "epoch": 11.37, + "learning_rate": 5.7102272727272735e-05, + "loss": 0.3939, "step": 1006 }, { - "epoch": 20.38, - "learning_rate": 7.155612244897959e-05, - "loss": 0.9742, + "epoch": 11.38, + "learning_rate": 5.696022727272727e-05, + "loss": 0.3904, "step": 1007 }, { - "epoch": 20.4, - "learning_rate": 7.142857142857143e-05, - "loss": 1.0621, + "epoch": 11.39, + "learning_rate": 5.6818181818181825e-05, + "loss": 0.406, "step": 1008 }, { - "epoch": 20.42, - "learning_rate": 7.130102040816326e-05, - "loss": 0.9672, + "epoch": 11.4, + "learning_rate": 5.667613636363637e-05, + "loss": 0.4185, "step": 1009 }, { - "epoch": 20.44, - "learning_rate": 7.117346938775511e-05, - "loss": 1.0018, + "epoch": 11.41, + "learning_rate": 5.653409090909091e-05, + "loss": 0.3976, "step": 1010 }, { - "epoch": 20.46, - "learning_rate": 7.104591836734694e-05, - "loss": 1.0045, + "epoch": 11.42, + "learning_rate": 5.639204545454546e-05, + "loss": 0.3907, "step": 1011 }, { - "epoch": 20.48, - "learning_rate": 7.091836734693877e-05, - "loss": 0.9675, + "epoch": 11.43, + "learning_rate": 5.6250000000000005e-05, + "loss": 0.4065, "step": 1012 }, { - "epoch": 20.5, - "learning_rate": 7.079081632653062e-05, - "loss": 0.976, + "epoch": 11.44, + "learning_rate": 5.6107954545454544e-05, + "loss": 0.4069, "step": 1013 }, { - "epoch": 20.52, - "learning_rate": 7.066326530612245e-05, - "loss": 1.0523, + "epoch": 11.46, + "learning_rate": 5.5965909090909095e-05, + "loss": 0.3964, "step": 1014 }, { - "epoch": 20.54, - "learning_rate": 7.053571428571429e-05, - "loss": 1.052, + "epoch": 11.47, + "learning_rate": 5.582386363636364e-05, + "loss": 0.3912, "step": 1015 }, { - "epoch": 20.56, - "learning_rate": 7.040816326530612e-05, - "loss": 0.9903, + "epoch": 11.48, + "learning_rate": 5.568181818181818e-05, + "loss": 0.3944, "step": 1016 }, { - "epoch": 20.58, - "learning_rate": 7.028061224489795e-05, - "loss": 1.0337, + "epoch": 11.49, + "learning_rate": 5.553977272727273e-05, + "loss": 0.4197, "step": 1017 }, { - "epoch": 20.6, - "learning_rate": 7.01530612244898e-05, - "loss": 1.1122, + "epoch": 11.5, + "learning_rate": 5.5397727272727276e-05, + "loss": 0.4064, "step": 1018 }, { - "epoch": 20.62, - "learning_rate": 7.002551020408164e-05, - "loss": 1.0133, + "epoch": 11.51, + "learning_rate": 5.525568181818183e-05, + "loss": 0.4054, "step": 1019 }, { - "epoch": 20.64, - "learning_rate": 6.989795918367347e-05, - "loss": 0.9588, + "epoch": 11.52, + "learning_rate": 5.5113636363636366e-05, + "loss": 0.4128, "step": 1020 }, { - "epoch": 20.66, - "learning_rate": 6.977040816326532e-05, - "loss": 0.9892, + "epoch": 11.53, + "learning_rate": 5.497159090909091e-05, + "loss": 0.3976, "step": 1021 }, { - "epoch": 20.68, - "learning_rate": 6.964285714285715e-05, - "loss": 1.025, + "epoch": 11.55, + "learning_rate": 5.482954545454546e-05, + "loss": 0.3863, "step": 1022 }, { - "epoch": 20.7, - "learning_rate": 6.951530612244899e-05, - "loss": 1.0196, + "epoch": 11.56, + "learning_rate": 5.46875e-05, + "loss": 0.3994, "step": 1023 }, { - "epoch": 20.72, - "learning_rate": 6.938775510204082e-05, - "loss": 1.0146, + "epoch": 11.57, + "learning_rate": 5.4545454545454546e-05, + "loss": 0.401, "step": 1024 }, { - "epoch": 20.74, - "learning_rate": 6.926020408163265e-05, - "loss": 1.0656, + "epoch": 11.58, + "learning_rate": 5.44034090909091e-05, + "loss": 0.3948, "step": 1025 }, { - "epoch": 20.76, - "learning_rate": 6.91326530612245e-05, - "loss": 0.9584, + "epoch": 11.59, + "learning_rate": 5.4261363636363636e-05, + "loss": 0.3967, "step": 1026 }, { - "epoch": 20.78, - "learning_rate": 6.900510204081633e-05, - "loss": 0.9877, + "epoch": 11.6, + "learning_rate": 5.411931818181818e-05, + "loss": 0.413, "step": 1027 }, { - "epoch": 20.8, - "learning_rate": 6.887755102040817e-05, - "loss": 1.0607, + "epoch": 11.61, + "learning_rate": 5.397727272727273e-05, + "loss": 0.4032, "step": 1028 }, { - "epoch": 20.82, - "learning_rate": 6.875e-05, - "loss": 0.9969, + "epoch": 11.63, + "learning_rate": 5.383522727272727e-05, + "loss": 0.3905, "step": 1029 }, { - "epoch": 20.84, - "learning_rate": 6.862244897959184e-05, - "loss": 0.9506, + "epoch": 11.64, + "learning_rate": 5.3693181818181823e-05, + "loss": 0.4041, "step": 1030 }, { - "epoch": 20.86, - "learning_rate": 6.849489795918368e-05, - "loss": 1.0576, + "epoch": 11.65, + "learning_rate": 5.355113636363637e-05, + "loss": 0.392, "step": 1031 }, { - "epoch": 20.88, - "learning_rate": 6.836734693877551e-05, - "loss": 1.0094, + "epoch": 11.66, + "learning_rate": 5.340909090909091e-05, + "loss": 0.3942, "step": 1032 }, { - "epoch": 20.9, - "learning_rate": 6.823979591836735e-05, - "loss": 0.9872, + "epoch": 11.67, + "learning_rate": 5.326704545454546e-05, + "loss": 0.3946, "step": 1033 }, { - "epoch": 20.92, - "learning_rate": 6.811224489795919e-05, - "loss": 1.0544, + "epoch": 11.68, + "learning_rate": 5.3125000000000004e-05, + "loss": 0.3989, "step": 1034 }, { - "epoch": 20.94, - "learning_rate": 6.798469387755102e-05, - "loss": 1.0194, + "epoch": 11.69, + "learning_rate": 5.298295454545454e-05, + "loss": 0.4101, "step": 1035 }, { - "epoch": 20.96, - "learning_rate": 6.785714285714286e-05, - "loss": 1.0009, + "epoch": 11.7, + "learning_rate": 5.2840909090909094e-05, + "loss": 0.4033, "step": 1036 }, { - "epoch": 20.98, - "learning_rate": 6.772959183673469e-05, - "loss": 0.9727, + "epoch": 11.72, + "learning_rate": 5.269886363636364e-05, + "loss": 0.3937, "step": 1037 }, { - "epoch": 21.0, - "learning_rate": 6.760204081632652e-05, - "loss": 0.9754, + "epoch": 11.73, + "learning_rate": 5.255681818181818e-05, + "loss": 0.3873, "step": 1038 }, { - "epoch": 21.02, - "learning_rate": 6.747448979591837e-05, - "loss": 0.9953, + "epoch": 11.74, + "learning_rate": 5.241477272727273e-05, + "loss": 0.3922, "step": 1039 }, { - "epoch": 21.04, - "learning_rate": 6.73469387755102e-05, - "loss": 0.9307, + "epoch": 11.75, + "learning_rate": 5.2272727272727274e-05, + "loss": 0.4016, "step": 1040 }, { - "epoch": 21.06, - "learning_rate": 6.721938775510204e-05, - "loss": 0.9151, + "epoch": 11.76, + "learning_rate": 5.2130681818181826e-05, + "loss": 0.3892, "step": 1041 }, { - "epoch": 21.08, - "learning_rate": 6.709183673469389e-05, - "loss": 0.9474, + "epoch": 11.77, + "learning_rate": 5.1988636363636364e-05, + "loss": 0.3974, "step": 1042 }, { - "epoch": 21.1, - "learning_rate": 6.696428571428572e-05, - "loss": 0.9697, + "epoch": 11.78, + "learning_rate": 5.184659090909091e-05, + "loss": 0.4024, "step": 1043 }, { - "epoch": 21.12, - "learning_rate": 6.683673469387756e-05, - "loss": 0.9423, + "epoch": 11.79, + "learning_rate": 5.170454545454546e-05, + "loss": 0.3889, "step": 1044 }, { - "epoch": 21.14, - "learning_rate": 6.670918367346939e-05, - "loss": 0.9797, + "epoch": 11.81, + "learning_rate": 5.15625e-05, + "loss": 0.4097, "step": 1045 }, { - "epoch": 21.16, - "learning_rate": 6.658163265306124e-05, - "loss": 0.919, + "epoch": 11.82, + "learning_rate": 5.1420454545454545e-05, + "loss": 0.4047, "step": 1046 }, { - "epoch": 21.18, - "learning_rate": 6.645408163265307e-05, - "loss": 0.9743, + "epoch": 11.83, + "learning_rate": 5.12784090909091e-05, + "loss": 0.4033, "step": 1047 }, { - "epoch": 21.21, - "learning_rate": 6.63265306122449e-05, - "loss": 0.9575, + "epoch": 11.84, + "learning_rate": 5.1136363636363635e-05, + "loss": 0.3774, "step": 1048 }, { - "epoch": 21.23, - "learning_rate": 6.619897959183674e-05, - "loss": 0.9861, + "epoch": 11.85, + "learning_rate": 5.099431818181818e-05, + "loss": 0.405, "step": 1049 }, { - "epoch": 21.25, - "learning_rate": 6.607142857142857e-05, - "loss": 0.9103, + "epoch": 11.86, + "learning_rate": 5.085227272727273e-05, + "loss": 0.3996, "step": 1050 }, { - "epoch": 21.27, - "learning_rate": 6.594387755102042e-05, - "loss": 0.993, + "epoch": 11.87, + "learning_rate": 5.071022727272727e-05, + "loss": 0.3885, "step": 1051 }, { - "epoch": 21.29, - "learning_rate": 6.581632653061225e-05, - "loss": 0.9668, + "epoch": 11.88, + "learning_rate": 5.056818181818183e-05, + "loss": 0.3914, "step": 1052 }, { - "epoch": 21.31, - "learning_rate": 6.568877551020408e-05, - "loss": 1.0008, + "epoch": 11.9, + "learning_rate": 5.042613636363637e-05, + "loss": 0.3908, "step": 1053 }, { - "epoch": 21.33, - "learning_rate": 6.556122448979592e-05, - "loss": 0.9825, + "epoch": 11.91, + "learning_rate": 5.0284090909090905e-05, + "loss": 0.3921, "step": 1054 }, { - "epoch": 21.35, - "learning_rate": 6.543367346938776e-05, - "loss": 1.0174, + "epoch": 11.92, + "learning_rate": 5.0142045454545464e-05, + "loss": 0.4077, "step": 1055 }, { - "epoch": 21.37, - "learning_rate": 6.530612244897959e-05, - "loss": 0.9685, + "epoch": 11.93, + "learning_rate": 5e-05, + "loss": 0.3973, "step": 1056 }, { - "epoch": 21.39, - "learning_rate": 6.517857142857143e-05, - "loss": 0.9265, + "epoch": 11.94, + "learning_rate": 4.985795454545455e-05, + "loss": 0.3986, "step": 1057 }, { - "epoch": 21.41, - "learning_rate": 6.505102040816326e-05, - "loss": 0.9495, + "epoch": 11.95, + "learning_rate": 4.971590909090909e-05, + "loss": 0.3938, "step": 1058 }, { - "epoch": 21.43, - "learning_rate": 6.49234693877551e-05, - "loss": 0.9541, + "epoch": 11.96, + "learning_rate": 4.957386363636364e-05, + "loss": 0.3897, "step": 1059 }, { - "epoch": 21.45, - "learning_rate": 6.479591836734694e-05, - "loss": 0.9299, + "epoch": 11.98, + "learning_rate": 4.943181818181818e-05, + "loss": 0.3965, "step": 1060 }, { - "epoch": 21.47, - "learning_rate": 6.466836734693877e-05, - "loss": 0.9625, + "epoch": 11.99, + "learning_rate": 4.9289772727272735e-05, + "loss": 0.3999, "step": 1061 }, { - "epoch": 21.49, - "learning_rate": 6.454081632653061e-05, - "loss": 1.0054, + "epoch": 12.0, + "learning_rate": 4.914772727272727e-05, + "loss": 0.3814, "step": 1062 }, { - "epoch": 21.51, - "learning_rate": 6.441326530612244e-05, - "loss": 0.9893, + "epoch": 12.01, + "learning_rate": 4.900568181818182e-05, + "loss": 0.3879, "step": 1063 }, { - "epoch": 21.53, - "learning_rate": 6.428571428571429e-05, - "loss": 0.9906, + "epoch": 12.02, + "learning_rate": 4.886363636363637e-05, + "loss": 0.3768, "step": 1064 }, { - "epoch": 21.55, - "learning_rate": 6.415816326530613e-05, - "loss": 0.9487, + "epoch": 12.03, + "learning_rate": 4.8721590909090915e-05, + "loss": 0.3813, "step": 1065 }, { - "epoch": 21.57, - "learning_rate": 6.403061224489796e-05, - "loss": 0.9728, + "epoch": 12.04, + "learning_rate": 4.857954545454545e-05, + "loss": 0.3759, "step": 1066 }, { - "epoch": 21.59, - "learning_rate": 6.390306122448981e-05, - "loss": 0.9883, + "epoch": 12.05, + "learning_rate": 4.8437500000000005e-05, + "loss": 0.3817, "step": 1067 }, { - "epoch": 21.61, - "learning_rate": 6.377551020408164e-05, - "loss": 1.053, + "epoch": 12.07, + "learning_rate": 4.829545454545455e-05, + "loss": 0.3773, "step": 1068 }, { - "epoch": 21.63, - "learning_rate": 6.364795918367348e-05, - "loss": 1.012, + "epoch": 12.08, + "learning_rate": 4.815340909090909e-05, + "loss": 0.3807, "step": 1069 }, { - "epoch": 21.65, - "learning_rate": 6.352040816326531e-05, - "loss": 0.962, + "epoch": 12.09, + "learning_rate": 4.801136363636364e-05, + "loss": 0.3757, "step": 1070 }, { - "epoch": 21.67, - "learning_rate": 6.339285714285714e-05, - "loss": 0.9955, + "epoch": 12.1, + "learning_rate": 4.7869318181818185e-05, + "loss": 0.3819, "step": 1071 }, { - "epoch": 21.69, - "learning_rate": 6.326530612244899e-05, - "loss": 0.9908, + "epoch": 12.11, + "learning_rate": 4.772727272727273e-05, + "loss": 0.3731, "step": 1072 }, { - "epoch": 21.71, - "learning_rate": 6.313775510204082e-05, - "loss": 1.0327, + "epoch": 12.12, + "learning_rate": 4.7585227272727276e-05, + "loss": 0.3706, "step": 1073 }, { - "epoch": 21.73, - "learning_rate": 6.301020408163265e-05, - "loss": 0.9255, + "epoch": 12.13, + "learning_rate": 4.744318181818182e-05, + "loss": 0.3762, "step": 1074 }, { - "epoch": 21.75, - "learning_rate": 6.28826530612245e-05, - "loss": 0.9268, + "epoch": 12.14, + "learning_rate": 4.7301136363636366e-05, + "loss": 0.3749, "step": 1075 }, { - "epoch": 21.77, - "learning_rate": 6.275510204081633e-05, - "loss": 0.9204, + "epoch": 12.16, + "learning_rate": 4.715909090909091e-05, + "loss": 0.3884, "step": 1076 }, { - "epoch": 21.79, - "learning_rate": 6.262755102040817e-05, - "loss": 0.9838, + "epoch": 12.17, + "learning_rate": 4.7017045454545456e-05, + "loss": 0.373, "step": 1077 }, { - "epoch": 21.81, - "learning_rate": 6.25e-05, - "loss": 0.954, + "epoch": 12.18, + "learning_rate": 4.6875e-05, + "loss": 0.374, "step": 1078 }, { - "epoch": 21.83, - "learning_rate": 6.237244897959183e-05, - "loss": 1.0102, + "epoch": 12.19, + "learning_rate": 4.6732954545454546e-05, + "loss": 0.3813, "step": 1079 }, { - "epoch": 21.85, - "learning_rate": 6.224489795918368e-05, - "loss": 0.916, + "epoch": 12.2, + "learning_rate": 4.659090909090909e-05, + "loss": 0.3745, "step": 1080 }, { - "epoch": 21.87, - "learning_rate": 6.211734693877551e-05, - "loss": 0.9939, + "epoch": 12.21, + "learning_rate": 4.6448863636363636e-05, + "loss": 0.3646, "step": 1081 }, { - "epoch": 21.89, - "learning_rate": 6.198979591836735e-05, - "loss": 0.9675, + "epoch": 12.22, + "learning_rate": 4.630681818181818e-05, + "loss": 0.3729, "step": 1082 }, { - "epoch": 21.91, - "learning_rate": 6.186224489795918e-05, - "loss": 0.9666, + "epoch": 12.24, + "learning_rate": 4.616477272727273e-05, + "loss": 0.3701, "step": 1083 }, { - "epoch": 21.93, - "learning_rate": 6.173469387755101e-05, - "loss": 0.9919, + "epoch": 12.25, + "learning_rate": 4.602272727272727e-05, + "loss": 0.3655, "step": 1084 }, { - "epoch": 21.95, - "learning_rate": 6.160714285714286e-05, - "loss": 1.0106, + "epoch": 12.26, + "learning_rate": 4.5880681818181817e-05, + "loss": 0.3758, "step": 1085 }, { - "epoch": 21.97, - "learning_rate": 6.14795918367347e-05, - "loss": 0.9982, + "epoch": 12.27, + "learning_rate": 4.573863636363637e-05, + "loss": 0.3682, "step": 1086 }, { - "epoch": 21.99, - "learning_rate": 6.135204081632653e-05, - "loss": 1.0137, + "epoch": 12.28, + "learning_rate": 4.5596590909090913e-05, + "loss": 0.3865, "step": 1087 }, { - "epoch": 22.01, - "learning_rate": 6.122448979591838e-05, - "loss": 0.9331, + "epoch": 12.29, + "learning_rate": 4.545454545454546e-05, + "loss": 0.363, "step": 1088 }, { - "epoch": 22.03, - "learning_rate": 6.109693877551021e-05, - "loss": 0.8834, + "epoch": 12.3, + "learning_rate": 4.5312500000000004e-05, + "loss": 0.3727, "step": 1089 }, { - "epoch": 22.06, - "learning_rate": 6.0969387755102046e-05, - "loss": 0.9757, + "epoch": 12.31, + "learning_rate": 4.517045454545455e-05, + "loss": 0.3827, "step": 1090 }, { - "epoch": 22.08, - "learning_rate": 6.084183673469388e-05, - "loss": 0.9038, + "epoch": 12.33, + "learning_rate": 4.5028409090909094e-05, + "loss": 0.3658, "step": 1091 }, { - "epoch": 22.1, - "learning_rate": 6.0714285714285715e-05, - "loss": 0.9097, + "epoch": 12.34, + "learning_rate": 4.488636363636364e-05, + "loss": 0.3844, "step": 1092 }, { - "epoch": 22.12, - "learning_rate": 6.058673469387756e-05, - "loss": 0.8972, + "epoch": 12.35, + "learning_rate": 4.4744318181818184e-05, + "loss": 0.3731, "step": 1093 }, { - "epoch": 22.14, - "learning_rate": 6.045918367346939e-05, - "loss": 0.8825, + "epoch": 12.36, + "learning_rate": 4.460227272727273e-05, + "loss": 0.3767, "step": 1094 }, { - "epoch": 22.16, - "learning_rate": 6.0331632653061234e-05, - "loss": 0.9814, + "epoch": 12.37, + "learning_rate": 4.4460227272727274e-05, + "loss": 0.3751, "step": 1095 }, { - "epoch": 22.18, - "learning_rate": 6.0204081632653065e-05, - "loss": 0.9874, + "epoch": 12.38, + "learning_rate": 4.431818181818182e-05, + "loss": 0.3718, "step": 1096 }, { - "epoch": 22.2, - "learning_rate": 6.0076530612244896e-05, - "loss": 0.912, + "epoch": 12.39, + "learning_rate": 4.4176136363636364e-05, + "loss": 0.3833, "step": 1097 }, { - "epoch": 22.22, - "learning_rate": 5.994897959183674e-05, - "loss": 0.9206, + "epoch": 12.4, + "learning_rate": 4.4034090909090916e-05, + "loss": 0.3673, "step": 1098 }, { - "epoch": 22.24, - "learning_rate": 5.982142857142857e-05, - "loss": 0.9497, + "epoch": 12.42, + "learning_rate": 4.3892045454545454e-05, + "loss": 0.3799, "step": 1099 }, { - "epoch": 22.26, - "learning_rate": 5.9693877551020416e-05, - "loss": 0.9269, + "epoch": 12.43, + "learning_rate": 4.375e-05, + "loss": 0.3661, "step": 1100 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 1.628705955351122e+18, + "total_flos": 1.5012226913684582e+18, "trial_name": null, "trial_params": null } diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-1100/training_args.bin +++ b/checkpoint-1100/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-1200/adapter_model.bin b/checkpoint-1200/adapter_model.bin index fe838bfb517b69e550de232754730118ea65bb04..ca33c973443b9a5cdd1321d0b918f881e3c41940 100644 --- a/checkpoint-1200/adapter_model.bin +++ b/checkpoint-1200/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d13f1953081527a4a05ac5762a6e373fae509922a72baa18bfbc0ea2fdd0db77 +oid sha256:65fdc6611558f201789c26daf35ebf132a5b6ddfa2a80501271c771a95626898 size 39409357 diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt index 2a557a97984c4cc74844fb337ee8e2cc0c298b34..1d1ed4505caa26ad2874c065eaafeea062885ba7 100644 --- a/checkpoint-1200/optimizer.pt +++ b/checkpoint-1200/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b4d37b67055d94cd6568b24b76e241acc964153f215407a64e94710d00d1e06 +oid sha256:bebd87283ad07af460aa2916048a35363c5b38020c6a8b96378eae6a00b91f6f size 78844421 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth index fead25ae6e1694df464049cc69133704457634ec..60089dd8a0937e03b89f32d15c42d6158dd4df6e 100644 --- a/checkpoint-1200/rng_state.pth +++ b/checkpoint-1200/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b32bbef891dbdd3524f32551a03a56bb081b6c86879cea3d24ae28b3c8f4bc5f +oid sha256:830e818e6899ff30b431c470cff0e4bafb5e8c7e429469a6fb9dfb5272323c44 size 14575 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt index 6552b7f80e3e5fe7062d2376620abb1ac8c373f3..37c14d25d17272726a6bb3834976522f235cec65 100644 --- a/checkpoint-1200/scheduler.pt +++ b/checkpoint-1200/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13122a9f43d31f9abd38d193f16e53342905df794d9995e7f5483234c3e54c54 +oid sha256:ffe5c887b5a4ca00db6fe980a4033162925ba89d876b21267ad5ea43ba89dcd3 size 627 diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json index e6d8ccf4d007ba938f1e3a0c7b69dfee967403ff..c8ed4c0eb146d2cd0cbfc8eb796bbd79930c275b 100644 --- a/checkpoint-1200/trainer_state.json +++ b/checkpoint-1200/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 24.28074612709453, + "epoch": 13.55692850838482, "eval_steps": 500, "global_step": 1200, "is_hyper_param_search": false, @@ -9,7211 +9,7211 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 }, { - "epoch": 14.18, - "learning_rate": 0.00011058673469387754, - "loss": 1.2069, + "epoch": 7.92, + "learning_rate": 0.00010042613636363636, + "loss": 0.6857, "step": 701 }, { - "epoch": 14.2, - "learning_rate": 0.00011045918367346939, - "loss": 1.1968, + "epoch": 7.93, + "learning_rate": 0.00010028409090909093, + "loss": 0.6677, "step": 702 }, { - "epoch": 14.22, - "learning_rate": 0.00011033163265306123, - "loss": 1.2236, + "epoch": 7.94, + "learning_rate": 0.00010014204545454546, + "loss": 0.6697, "step": 703 }, { - "epoch": 14.24, - "learning_rate": 0.00011020408163265306, - "loss": 1.1942, + "epoch": 7.95, + "learning_rate": 0.0001, + "loss": 0.6375, "step": 704 }, { - "epoch": 14.26, - "learning_rate": 0.0001100765306122449, - "loss": 1.2561, + "epoch": 7.96, + "learning_rate": 9.985795454545455e-05, + "loss": 0.6572, "step": 705 }, { - "epoch": 14.29, - "learning_rate": 0.00010994897959183674, - "loss": 1.1839, + "epoch": 7.98, + "learning_rate": 9.97159090909091e-05, + "loss": 0.668, "step": 706 }, { - "epoch": 14.31, - "learning_rate": 0.00010982142857142858, - "loss": 1.2128, + "epoch": 7.99, + "learning_rate": 9.957386363636364e-05, + "loss": 0.6797, "step": 707 }, { - "epoch": 14.33, - "learning_rate": 0.00010969387755102041, - "loss": 1.3086, + "epoch": 8.0, + "learning_rate": 9.943181818181819e-05, + "loss": 0.6784, "step": 708 }, { - "epoch": 14.35, - "learning_rate": 0.00010956632653061226, - "loss": 1.2379, + "epoch": 8.01, + "learning_rate": 9.928977272727273e-05, + "loss": 0.6192, "step": 709 }, { - "epoch": 14.37, - "learning_rate": 0.0001094387755102041, - "loss": 1.176, + "epoch": 8.02, + "learning_rate": 9.914772727272728e-05, + "loss": 0.6287, "step": 710 }, { - "epoch": 14.39, - "learning_rate": 0.00010931122448979592, - "loss": 1.2105, + "epoch": 8.03, + "learning_rate": 9.900568181818183e-05, + "loss": 0.6034, "step": 711 }, { - "epoch": 14.41, - "learning_rate": 0.00010918367346938776, - "loss": 1.2149, + "epoch": 8.04, + "learning_rate": 9.886363636363637e-05, + "loss": 0.6167, "step": 712 }, { - "epoch": 14.43, - "learning_rate": 0.0001090561224489796, - "loss": 1.2392, + "epoch": 8.06, + "learning_rate": 9.872159090909091e-05, + "loss": 0.6353, "step": 713 }, { - "epoch": 14.45, - "learning_rate": 0.00010892857142857142, - "loss": 1.2471, + "epoch": 8.07, + "learning_rate": 9.857954545454547e-05, + "loss": 0.6222, "step": 714 }, { - "epoch": 14.47, - "learning_rate": 0.00010880102040816327, - "loss": 1.2561, + "epoch": 8.08, + "learning_rate": 9.84375e-05, + "loss": 0.5963, "step": 715 }, { - "epoch": 14.49, - "learning_rate": 0.00010867346938775511, - "loss": 1.2179, + "epoch": 8.09, + "learning_rate": 9.829545454545455e-05, + "loss": 0.6042, "step": 716 }, { - "epoch": 14.51, - "learning_rate": 0.00010854591836734696, - "loss": 1.2459, + "epoch": 8.1, + "learning_rate": 9.81534090909091e-05, + "loss": 0.612, "step": 717 }, { - "epoch": 14.53, - "learning_rate": 0.00010841836734693877, - "loss": 1.2933, + "epoch": 8.11, + "learning_rate": 9.801136363636364e-05, + "loss": 0.6069, "step": 718 }, { - "epoch": 14.55, - "learning_rate": 0.00010829081632653062, - "loss": 1.2862, + "epoch": 8.12, + "learning_rate": 9.786931818181818e-05, + "loss": 0.6001, "step": 719 }, { - "epoch": 14.57, - "learning_rate": 0.00010816326530612246, - "loss": 1.2976, + "epoch": 8.13, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6007, "step": 720 }, { - "epoch": 14.59, - "learning_rate": 0.00010803571428571428, - "loss": 1.231, + "epoch": 8.15, + "learning_rate": 9.758522727272727e-05, + "loss": 0.6079, "step": 721 }, { - "epoch": 14.61, - "learning_rate": 0.00010790816326530613, - "loss": 1.2464, + "epoch": 8.16, + "learning_rate": 9.744318181818183e-05, + "loss": 0.6216, "step": 722 }, { - "epoch": 14.63, - "learning_rate": 0.00010778061224489797, - "loss": 1.2181, + "epoch": 8.17, + "learning_rate": 9.730113636363637e-05, + "loss": 0.6321, "step": 723 }, { - "epoch": 14.65, - "learning_rate": 0.00010765306122448979, - "loss": 1.3307, + "epoch": 8.18, + "learning_rate": 9.71590909090909e-05, + "loss": 0.6044, "step": 724 }, { - "epoch": 14.67, - "learning_rate": 0.00010752551020408163, - "loss": 1.1723, + "epoch": 8.19, + "learning_rate": 9.701704545454547e-05, + "loss": 0.6028, "step": 725 }, { - "epoch": 14.69, - "learning_rate": 0.00010739795918367348, - "loss": 1.1528, + "epoch": 8.2, + "learning_rate": 9.687500000000001e-05, + "loss": 0.6098, "step": 726 }, { - "epoch": 14.71, - "learning_rate": 0.0001072704081632653, - "loss": 1.215, + "epoch": 8.21, + "learning_rate": 9.673295454545454e-05, + "loss": 0.6032, "step": 727 }, { - "epoch": 14.73, - "learning_rate": 0.00010714285714285715, - "loss": 1.2624, + "epoch": 8.22, + "learning_rate": 9.65909090909091e-05, + "loss": 0.6298, "step": 728 }, { - "epoch": 14.75, - "learning_rate": 0.00010701530612244898, - "loss": 1.3117, + "epoch": 8.24, + "learning_rate": 9.644886363636365e-05, + "loss": 0.6115, "step": 729 }, { - "epoch": 14.77, - "learning_rate": 0.00010688775510204083, - "loss": 1.2572, + "epoch": 8.25, + "learning_rate": 9.630681818181818e-05, + "loss": 0.6052, "step": 730 }, { - "epoch": 14.79, - "learning_rate": 0.00010676020408163266, - "loss": 1.222, + "epoch": 8.26, + "learning_rate": 9.616477272727274e-05, + "loss": 0.6097, "step": 731 }, { - "epoch": 14.81, - "learning_rate": 0.0001066326530612245, - "loss": 1.2881, + "epoch": 8.27, + "learning_rate": 9.602272727272728e-05, + "loss": 0.6062, "step": 732 }, { - "epoch": 14.83, - "learning_rate": 0.00010650510204081635, - "loss": 1.2676, + "epoch": 8.28, + "learning_rate": 9.588068181818183e-05, + "loss": 0.5984, "step": 733 }, { - "epoch": 14.85, - "learning_rate": 0.00010637755102040816, - "loss": 1.2734, + "epoch": 8.29, + "learning_rate": 9.573863636363637e-05, + "loss": 0.6432, "step": 734 }, { - "epoch": 14.87, - "learning_rate": 0.00010625000000000001, - "loss": 1.2885, + "epoch": 8.3, + "learning_rate": 9.559659090909092e-05, + "loss": 0.5814, "step": 735 }, { - "epoch": 14.89, - "learning_rate": 0.00010612244897959185, - "loss": 1.2764, + "epoch": 8.31, + "learning_rate": 9.545454545454546e-05, + "loss": 0.5965, "step": 736 }, { - "epoch": 14.91, - "learning_rate": 0.00010599489795918367, - "loss": 1.3267, + "epoch": 8.33, + "learning_rate": 9.53125e-05, + "loss": 0.6102, "step": 737 }, { - "epoch": 14.93, - "learning_rate": 0.00010586734693877551, - "loss": 1.2445, + "epoch": 8.34, + "learning_rate": 9.517045454545455e-05, + "loss": 0.5849, "step": 738 }, { - "epoch": 14.95, - "learning_rate": 0.00010573979591836736, - "loss": 1.3359, + "epoch": 8.35, + "learning_rate": 9.50284090909091e-05, + "loss": 0.6062, "step": 739 }, { - "epoch": 14.97, - "learning_rate": 0.00010561224489795918, - "loss": 1.2508, + "epoch": 8.36, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6031, "step": 740 }, { - "epoch": 14.99, - "learning_rate": 0.00010548469387755102, - "loss": 1.2227, + "epoch": 8.37, + "learning_rate": 9.474431818181819e-05, + "loss": 0.5932, "step": 741 }, { - "epoch": 15.01, - "learning_rate": 0.00010535714285714286, - "loss": 1.1889, + "epoch": 8.38, + "learning_rate": 9.460227272727273e-05, + "loss": 0.589, "step": 742 }, { - "epoch": 15.03, - "learning_rate": 0.00010522959183673471, - "loss": 1.1919, + "epoch": 8.39, + "learning_rate": 9.446022727272728e-05, + "loss": 0.6096, "step": 743 }, { - "epoch": 15.05, - "learning_rate": 0.00010510204081632653, - "loss": 1.2383, + "epoch": 8.41, + "learning_rate": 9.431818181818182e-05, + "loss": 0.601, "step": 744 }, { - "epoch": 15.07, - "learning_rate": 0.00010497448979591837, - "loss": 1.2401, + "epoch": 8.42, + "learning_rate": 9.417613636363637e-05, + "loss": 0.5798, "step": 745 }, { - "epoch": 15.09, - "learning_rate": 0.00010484693877551021, - "loss": 1.2015, + "epoch": 8.43, + "learning_rate": 9.403409090909091e-05, + "loss": 0.59, "step": 746 }, { - "epoch": 15.11, - "learning_rate": 0.00010471938775510203, - "loss": 1.1509, + "epoch": 8.44, + "learning_rate": 9.389204545454546e-05, + "loss": 0.5988, "step": 747 }, { - "epoch": 15.13, - "learning_rate": 0.00010459183673469388, - "loss": 1.1878, + "epoch": 8.45, + "learning_rate": 9.375e-05, + "loss": 0.5591, "step": 748 }, { - "epoch": 15.16, - "learning_rate": 0.00010446428571428572, - "loss": 1.1706, + "epoch": 8.46, + "learning_rate": 9.360795454545455e-05, + "loss": 0.5939, "step": 749 }, { - "epoch": 15.18, - "learning_rate": 0.00010433673469387755, - "loss": 1.1285, + "epoch": 8.47, + "learning_rate": 9.346590909090909e-05, + "loss": 0.5886, "step": 750 }, { - "epoch": 15.2, - "learning_rate": 0.0001042091836734694, - "loss": 1.1608, + "epoch": 8.48, + "learning_rate": 9.332386363636364e-05, + "loss": 0.5994, "step": 751 }, { - "epoch": 15.22, - "learning_rate": 0.00010408163265306123, - "loss": 1.1178, + "epoch": 8.5, + "learning_rate": 9.318181818181818e-05, + "loss": 0.5821, "step": 752 }, { - "epoch": 15.24, - "learning_rate": 0.00010395408163265306, - "loss": 1.1293, + "epoch": 8.51, + "learning_rate": 9.303977272727273e-05, + "loss": 0.602, "step": 753 }, { - "epoch": 15.26, - "learning_rate": 0.0001038265306122449, - "loss": 1.2306, + "epoch": 8.52, + "learning_rate": 9.289772727272727e-05, + "loss": 0.5708, "step": 754 }, { - "epoch": 15.28, - "learning_rate": 0.00010369897959183675, - "loss": 1.1541, + "epoch": 8.53, + "learning_rate": 9.275568181818183e-05, + "loss": 0.5902, "step": 755 }, { - "epoch": 15.3, - "learning_rate": 0.00010357142857142859, - "loss": 1.1702, + "epoch": 8.54, + "learning_rate": 9.261363636363636e-05, + "loss": 0.6053, "step": 756 }, { - "epoch": 15.32, - "learning_rate": 0.00010344387755102041, - "loss": 1.2119, + "epoch": 8.55, + "learning_rate": 9.247159090909091e-05, + "loss": 0.5797, "step": 757 }, { - "epoch": 15.34, - "learning_rate": 0.00010331632653061225, - "loss": 1.2239, + "epoch": 8.56, + "learning_rate": 9.232954545454547e-05, + "loss": 0.5965, "step": 758 }, { - "epoch": 15.36, - "learning_rate": 0.0001031887755102041, - "loss": 1.2019, + "epoch": 8.57, + "learning_rate": 9.21875e-05, + "loss": 0.5738, "step": 759 }, { - "epoch": 15.38, - "learning_rate": 0.00010306122448979591, - "loss": 1.2197, + "epoch": 8.59, + "learning_rate": 9.204545454545454e-05, + "loss": 0.5819, "step": 760 }, { - "epoch": 15.4, - "learning_rate": 0.00010293367346938776, - "loss": 1.1769, + "epoch": 8.6, + "learning_rate": 9.19034090909091e-05, + "loss": 0.5994, "step": 761 }, { - "epoch": 15.42, - "learning_rate": 0.0001028061224489796, - "loss": 1.1907, + "epoch": 8.61, + "learning_rate": 9.176136363636363e-05, + "loss": 0.5738, "step": 762 }, { - "epoch": 15.44, - "learning_rate": 0.00010267857142857142, - "loss": 1.2089, + "epoch": 8.62, + "learning_rate": 9.161931818181818e-05, + "loss": 0.5663, "step": 763 }, { - "epoch": 15.46, - "learning_rate": 0.00010255102040816327, - "loss": 1.1335, + "epoch": 8.63, + "learning_rate": 9.147727272727274e-05, + "loss": 0.5798, "step": 764 }, { - "epoch": 15.48, - "learning_rate": 0.00010242346938775511, - "loss": 1.1633, + "epoch": 8.64, + "learning_rate": 9.133522727272727e-05, + "loss": 0.5705, "step": 765 }, { - "epoch": 15.5, - "learning_rate": 0.00010229591836734695, - "loss": 1.1578, + "epoch": 8.65, + "learning_rate": 9.119318181818183e-05, + "loss": 0.5943, "step": 766 }, { - "epoch": 15.52, - "learning_rate": 0.00010216836734693877, - "loss": 1.2236, + "epoch": 8.67, + "learning_rate": 9.105113636363637e-05, + "loss": 0.6019, "step": 767 }, { - "epoch": 15.54, - "learning_rate": 0.00010204081632653062, - "loss": 1.1941, + "epoch": 8.68, + "learning_rate": 9.090909090909092e-05, + "loss": 0.5733, "step": 768 }, { - "epoch": 15.56, - "learning_rate": 0.00010191326530612246, - "loss": 1.2666, + "epoch": 8.69, + "learning_rate": 9.076704545454546e-05, + "loss": 0.575, "step": 769 }, { - "epoch": 15.58, - "learning_rate": 0.00010178571428571428, - "loss": 1.1232, + "epoch": 8.7, + "learning_rate": 9.062500000000001e-05, + "loss": 0.5675, "step": 770 }, { - "epoch": 15.6, - "learning_rate": 0.00010165816326530612, - "loss": 1.2242, + "epoch": 8.71, + "learning_rate": 9.048295454545455e-05, + "loss": 0.566, "step": 771 }, { - "epoch": 15.62, - "learning_rate": 0.00010153061224489797, - "loss": 1.1852, + "epoch": 8.72, + "learning_rate": 9.03409090909091e-05, + "loss": 0.5513, "step": 772 }, { - "epoch": 15.64, - "learning_rate": 0.0001014030612244898, - "loss": 1.2626, + "epoch": 8.73, + "learning_rate": 9.019886363636364e-05, + "loss": 0.5682, "step": 773 }, { - "epoch": 15.66, - "learning_rate": 0.00010127551020408164, - "loss": 1.1873, + "epoch": 8.74, + "learning_rate": 9.005681818181819e-05, + "loss": 0.5508, "step": 774 }, { - "epoch": 15.68, - "learning_rate": 0.00010114795918367349, - "loss": 1.3005, + "epoch": 8.76, + "learning_rate": 8.991477272727273e-05, + "loss": 0.5668, "step": 775 }, { - "epoch": 15.7, - "learning_rate": 0.0001010204081632653, - "loss": 1.1904, + "epoch": 8.77, + "learning_rate": 8.977272727272728e-05, + "loss": 0.569, "step": 776 }, { - "epoch": 15.72, - "learning_rate": 0.00010089285714285715, - "loss": 1.2927, + "epoch": 8.78, + "learning_rate": 8.963068181818182e-05, + "loss": 0.5897, "step": 777 }, { - "epoch": 15.74, - "learning_rate": 0.00010076530612244899, - "loss": 1.179, + "epoch": 8.79, + "learning_rate": 8.948863636363637e-05, + "loss": 0.5738, "step": 778 }, { - "epoch": 15.76, - "learning_rate": 0.00010063775510204084, - "loss": 1.2027, + "epoch": 8.8, + "learning_rate": 8.934659090909091e-05, + "loss": 0.5511, "step": 779 }, { - "epoch": 15.78, - "learning_rate": 0.00010051020408163265, - "loss": 1.2428, + "epoch": 8.81, + "learning_rate": 8.920454545454546e-05, + "loss": 0.5659, "step": 780 }, { - "epoch": 15.8, - "learning_rate": 0.0001003826530612245, - "loss": 1.2324, + "epoch": 8.82, + "learning_rate": 8.90625e-05, + "loss": 0.5649, "step": 781 }, { - "epoch": 15.82, - "learning_rate": 0.00010025510204081634, - "loss": 1.1251, + "epoch": 8.83, + "learning_rate": 8.892045454545455e-05, + "loss": 0.5618, "step": 782 }, { - "epoch": 15.84, - "learning_rate": 0.00010012755102040816, - "loss": 1.2405, + "epoch": 8.85, + "learning_rate": 8.87784090909091e-05, + "loss": 0.5602, "step": 783 }, { - "epoch": 15.86, - "learning_rate": 0.0001, - "loss": 1.2005, + "epoch": 8.86, + "learning_rate": 8.863636363636364e-05, + "loss": 0.5723, "step": 784 }, { - "epoch": 15.88, - "learning_rate": 9.987244897959184e-05, - "loss": 1.2259, + "epoch": 8.87, + "learning_rate": 8.849431818181818e-05, + "loss": 0.5816, "step": 785 }, { - "epoch": 15.9, - "learning_rate": 9.974489795918368e-05, - "loss": 1.1576, + "epoch": 8.88, + "learning_rate": 8.835227272727273e-05, + "loss": 0.555, "step": 786 }, { - "epoch": 15.92, - "learning_rate": 9.961734693877551e-05, - "loss": 1.1834, + "epoch": 8.89, + "learning_rate": 8.821022727272727e-05, + "loss": 0.5563, "step": 787 }, { - "epoch": 15.94, - "learning_rate": 9.948979591836736e-05, - "loss": 1.2396, + "epoch": 8.9, + "learning_rate": 8.806818181818183e-05, + "loss": 0.554, "step": 788 }, { - "epoch": 15.96, - "learning_rate": 9.936224489795919e-05, - "loss": 1.1865, + "epoch": 8.91, + "learning_rate": 8.792613636363636e-05, + "loss": 0.5671, "step": 789 }, { - "epoch": 15.98, - "learning_rate": 9.923469387755102e-05, - "loss": 1.2356, + "epoch": 8.92, + "learning_rate": 8.778409090909091e-05, + "loss": 0.5485, "step": 790 }, { - "epoch": 16.01, - "learning_rate": 9.910714285714286e-05, - "loss": 1.2639, + "epoch": 8.94, + "learning_rate": 8.764204545454547e-05, + "loss": 0.5712, "step": 791 }, { - "epoch": 16.03, - "learning_rate": 9.897959183673469e-05, - "loss": 1.1216, + "epoch": 8.95, + "learning_rate": 8.75e-05, + "loss": 0.5507, "step": 792 }, { - "epoch": 16.05, - "learning_rate": 9.885204081632652e-05, - "loss": 1.1051, + "epoch": 8.96, + "learning_rate": 8.735795454545454e-05, + "loss": 0.5718, "step": 793 }, { - "epoch": 16.07, - "learning_rate": 9.872448979591837e-05, - "loss": 1.0864, + "epoch": 8.97, + "learning_rate": 8.72159090909091e-05, + "loss": 0.5585, "step": 794 }, { - "epoch": 16.09, - "learning_rate": 9.859693877551021e-05, - "loss": 1.182, + "epoch": 8.98, + "learning_rate": 8.707386363636363e-05, + "loss": 0.5563, "step": 795 }, { - "epoch": 16.11, - "learning_rate": 9.846938775510204e-05, - "loss": 1.1272, + "epoch": 8.99, + "learning_rate": 8.693181818181818e-05, + "loss": 0.581, "step": 796 }, { - "epoch": 16.13, - "learning_rate": 9.834183673469389e-05, - "loss": 1.1946, + "epoch": 9.0, + "learning_rate": 8.678977272727274e-05, + "loss": 0.5511, "step": 797 }, { - "epoch": 16.15, - "learning_rate": 9.821428571428572e-05, - "loss": 1.0875, + "epoch": 9.02, + "learning_rate": 8.664772727272727e-05, + "loss": 0.5103, "step": 798 }, { - "epoch": 16.17, - "learning_rate": 9.808673469387756e-05, - "loss": 1.1671, + "epoch": 9.03, + "learning_rate": 8.650568181818183e-05, + "loss": 0.5323, "step": 799 }, { - "epoch": 16.19, - "learning_rate": 9.79591836734694e-05, - "loss": 1.1502, + "epoch": 9.04, + "learning_rate": 8.636363636363637e-05, + "loss": 0.5092, "step": 800 }, { - "epoch": 16.21, - "learning_rate": 9.783163265306124e-05, - "loss": 1.19, + "epoch": 9.05, + "learning_rate": 8.62215909090909e-05, + "loss": 0.5247, "step": 801 }, { - "epoch": 16.23, - "learning_rate": 9.770408163265307e-05, - "loss": 1.1258, + "epoch": 9.06, + "learning_rate": 8.607954545454546e-05, + "loss": 0.5403, "step": 802 }, { - "epoch": 16.25, - "learning_rate": 9.75765306122449e-05, - "loss": 1.1765, + "epoch": 9.07, + "learning_rate": 8.593750000000001e-05, + "loss": 0.5252, "step": 803 }, { - "epoch": 16.27, - "learning_rate": 9.744897959183674e-05, - "loss": 1.1217, + "epoch": 9.08, + "learning_rate": 8.579545454545454e-05, + "loss": 0.5296, "step": 804 }, { - "epoch": 16.29, - "learning_rate": 9.732142857142858e-05, - "loss": 1.1293, + "epoch": 9.09, + "learning_rate": 8.56534090909091e-05, + "loss": 0.5223, "step": 805 }, { - "epoch": 16.31, - "learning_rate": 9.719387755102042e-05, - "loss": 1.17, + "epoch": 9.11, + "learning_rate": 8.551136363636364e-05, + "loss": 0.4972, "step": 806 }, { - "epoch": 16.33, - "learning_rate": 9.706632653061225e-05, - "loss": 1.17, + "epoch": 9.12, + "learning_rate": 8.536931818181818e-05, + "loss": 0.5005, "step": 807 }, { - "epoch": 16.35, - "learning_rate": 9.693877551020408e-05, - "loss": 1.2004, + "epoch": 9.13, + "learning_rate": 8.522727272727273e-05, + "loss": 0.5249, "step": 808 }, { - "epoch": 16.37, - "learning_rate": 9.681122448979593e-05, - "loss": 1.1648, + "epoch": 9.14, + "learning_rate": 8.508522727272728e-05, + "loss": 0.5135, "step": 809 }, { - "epoch": 16.39, - "learning_rate": 9.668367346938776e-05, - "loss": 1.0688, + "epoch": 9.15, + "learning_rate": 8.494318181818182e-05, + "loss": 0.5053, "step": 810 }, { - "epoch": 16.41, - "learning_rate": 9.655612244897959e-05, - "loss": 1.1607, + "epoch": 9.16, + "learning_rate": 8.480113636363637e-05, + "loss": 0.5158, "step": 811 }, { - "epoch": 16.43, - "learning_rate": 9.642857142857143e-05, - "loss": 1.1298, + "epoch": 9.17, + "learning_rate": 8.465909090909091e-05, + "loss": 0.5061, "step": 812 }, { - "epoch": 16.45, - "learning_rate": 9.630102040816326e-05, - "loss": 1.1064, + "epoch": 9.18, + "learning_rate": 8.451704545454546e-05, + "loss": 0.4988, "step": 813 }, { - "epoch": 16.47, - "learning_rate": 9.617346938775511e-05, - "loss": 1.1472, + "epoch": 9.2, + "learning_rate": 8.4375e-05, + "loss": 0.5273, "step": 814 }, { - "epoch": 16.49, - "learning_rate": 9.604591836734694e-05, - "loss": 1.1577, + "epoch": 9.21, + "learning_rate": 8.423295454545455e-05, + "loss": 0.5332, "step": 815 }, { - "epoch": 16.51, - "learning_rate": 9.591836734693878e-05, - "loss": 1.1436, + "epoch": 9.22, + "learning_rate": 8.40909090909091e-05, + "loss": 0.5181, "step": 816 }, { - "epoch": 16.53, - "learning_rate": 9.579081632653061e-05, - "loss": 1.1657, + "epoch": 9.23, + "learning_rate": 8.394886363636364e-05, + "loss": 0.5085, "step": 817 }, { - "epoch": 16.55, - "learning_rate": 9.566326530612246e-05, - "loss": 1.1147, + "epoch": 9.24, + "learning_rate": 8.380681818181818e-05, + "loss": 0.5137, "step": 818 }, { - "epoch": 16.57, - "learning_rate": 9.553571428571429e-05, - "loss": 1.1839, + "epoch": 9.25, + "learning_rate": 8.366477272727273e-05, + "loss": 0.5195, "step": 819 }, { - "epoch": 16.59, - "learning_rate": 9.540816326530613e-05, - "loss": 1.1298, + "epoch": 9.26, + "learning_rate": 8.352272727272727e-05, + "loss": 0.5077, "step": 820 }, { - "epoch": 16.61, - "learning_rate": 9.528061224489796e-05, - "loss": 1.2141, + "epoch": 9.28, + "learning_rate": 8.338068181818183e-05, + "loss": 0.5074, "step": 821 }, { - "epoch": 16.63, - "learning_rate": 9.515306122448981e-05, - "loss": 1.2045, + "epoch": 9.29, + "learning_rate": 8.323863636363637e-05, + "loss": 0.5142, "step": 822 }, { - "epoch": 16.65, - "learning_rate": 9.502551020408164e-05, - "loss": 1.1791, + "epoch": 9.3, + "learning_rate": 8.309659090909091e-05, + "loss": 0.5116, "step": 823 }, { - "epoch": 16.67, - "learning_rate": 9.489795918367348e-05, - "loss": 1.1137, + "epoch": 9.31, + "learning_rate": 8.295454545454547e-05, + "loss": 0.4974, "step": 824 }, { - "epoch": 16.69, - "learning_rate": 9.477040816326531e-05, - "loss": 1.1312, + "epoch": 9.32, + "learning_rate": 8.28125e-05, + "loss": 0.5117, "step": 825 }, { - "epoch": 16.71, - "learning_rate": 9.464285714285715e-05, - "loss": 1.1102, + "epoch": 9.33, + "learning_rate": 8.267045454545455e-05, + "loss": 0.5114, "step": 826 }, { - "epoch": 16.73, - "learning_rate": 9.451530612244899e-05, - "loss": 1.1865, + "epoch": 9.34, + "learning_rate": 8.25284090909091e-05, + "loss": 0.5039, "step": 827 }, { - "epoch": 16.75, - "learning_rate": 9.438775510204082e-05, - "loss": 1.1232, + "epoch": 9.35, + "learning_rate": 8.238636363636364e-05, + "loss": 0.498, "step": 828 }, { - "epoch": 16.77, - "learning_rate": 9.426020408163265e-05, - "loss": 1.2068, + "epoch": 9.37, + "learning_rate": 8.224431818181818e-05, + "loss": 0.5042, "step": 829 }, { - "epoch": 16.79, - "learning_rate": 9.41326530612245e-05, - "loss": 1.1864, + "epoch": 9.38, + "learning_rate": 8.210227272727274e-05, + "loss": 0.5049, "step": 830 }, { - "epoch": 16.81, - "learning_rate": 9.400510204081633e-05, - "loss": 1.2195, + "epoch": 9.39, + "learning_rate": 8.196022727272727e-05, + "loss": 0.5123, "step": 831 }, { - "epoch": 16.83, - "learning_rate": 9.387755102040817e-05, - "loss": 1.2063, + "epoch": 9.4, + "learning_rate": 8.181818181818183e-05, + "loss": 0.4907, "step": 832 }, { - "epoch": 16.85, - "learning_rate": 9.375e-05, - "loss": 1.1455, + "epoch": 9.41, + "learning_rate": 8.167613636363637e-05, + "loss": 0.5267, "step": 833 }, { - "epoch": 16.88, - "learning_rate": 9.362244897959183e-05, - "loss": 1.1819, + "epoch": 9.42, + "learning_rate": 8.15340909090909e-05, + "loss": 0.5314, "step": 834 }, { - "epoch": 16.9, - "learning_rate": 9.349489795918368e-05, - "loss": 1.1887, + "epoch": 9.43, + "learning_rate": 8.139204545454546e-05, + "loss": 0.4952, "step": 835 }, { - "epoch": 16.92, - "learning_rate": 9.336734693877551e-05, - "loss": 1.1557, + "epoch": 9.44, + "learning_rate": 8.125000000000001e-05, + "loss": 0.5014, "step": 836 }, { - "epoch": 16.94, - "learning_rate": 9.323979591836735e-05, - "loss": 1.2094, + "epoch": 9.46, + "learning_rate": 8.110795454545454e-05, + "loss": 0.4967, "step": 837 }, { - "epoch": 16.96, - "learning_rate": 9.311224489795918e-05, - "loss": 1.1512, + "epoch": 9.47, + "learning_rate": 8.09659090909091e-05, + "loss": 0.5116, "step": 838 }, { - "epoch": 16.98, - "learning_rate": 9.298469387755103e-05, - "loss": 1.1463, + "epoch": 9.48, + "learning_rate": 8.082386363636365e-05, + "loss": 0.5119, "step": 839 }, { - "epoch": 17.0, - "learning_rate": 9.285714285714286e-05, - "loss": 1.155, + "epoch": 9.49, + "learning_rate": 8.068181818181818e-05, + "loss": 0.4987, "step": 840 }, { - "epoch": 17.02, - "learning_rate": 9.27295918367347e-05, - "loss": 1.1292, + "epoch": 9.5, + "learning_rate": 8.053977272727274e-05, + "loss": 0.5063, "step": 841 }, { - "epoch": 17.04, - "learning_rate": 9.260204081632653e-05, - "loss": 1.0996, + "epoch": 9.51, + "learning_rate": 8.039772727272728e-05, + "loss": 0.5019, "step": 842 }, { - "epoch": 17.06, - "learning_rate": 9.247448979591838e-05, - "loss": 1.0662, + "epoch": 9.52, + "learning_rate": 8.025568181818183e-05, + "loss": 0.5272, "step": 843 }, { - "epoch": 17.08, - "learning_rate": 9.234693877551021e-05, - "loss": 1.0931, + "epoch": 9.54, + "learning_rate": 8.011363636363637e-05, + "loss": 0.4969, "step": 844 }, { - "epoch": 17.1, - "learning_rate": 9.221938775510205e-05, - "loss": 1.0727, + "epoch": 9.55, + "learning_rate": 7.997159090909092e-05, + "loss": 0.5222, "step": 845 }, { - "epoch": 17.12, - "learning_rate": 9.209183673469388e-05, - "loss": 1.1043, + "epoch": 9.56, + "learning_rate": 7.982954545454546e-05, + "loss": 0.4729, "step": 846 }, { - "epoch": 17.14, - "learning_rate": 9.196428571428572e-05, - "loss": 1.0594, + "epoch": 9.57, + "learning_rate": 7.96875e-05, + "loss": 0.4976, "step": 847 }, { - "epoch": 17.16, - "learning_rate": 9.183673469387756e-05, - "loss": 1.0952, + "epoch": 9.58, + "learning_rate": 7.954545454545455e-05, + "loss": 0.4974, "step": 848 }, { - "epoch": 17.18, - "learning_rate": 9.170918367346939e-05, - "loss": 1.0639, + "epoch": 9.59, + "learning_rate": 7.94034090909091e-05, + "loss": 0.4849, "step": 849 }, { - "epoch": 17.2, - "learning_rate": 9.158163265306124e-05, - "loss": 1.132, + "epoch": 9.6, + "learning_rate": 7.926136363636364e-05, + "loss": 0.4897, "step": 850 }, { - "epoch": 17.22, - "learning_rate": 9.145408163265307e-05, - "loss": 1.1083, + "epoch": 9.61, + "learning_rate": 7.911931818181819e-05, + "loss": 0.4962, "step": 851 }, { - "epoch": 17.24, - "learning_rate": 9.13265306122449e-05, - "loss": 1.1282, + "epoch": 9.63, + "learning_rate": 7.897727272727273e-05, + "loss": 0.4877, "step": 852 }, { - "epoch": 17.26, - "learning_rate": 9.119897959183674e-05, - "loss": 1.0474, + "epoch": 9.64, + "learning_rate": 7.883522727272728e-05, + "loss": 0.4921, "step": 853 }, { - "epoch": 17.28, - "learning_rate": 9.107142857142857e-05, - "loss": 1.1138, + "epoch": 9.65, + "learning_rate": 7.869318181818182e-05, + "loss": 0.4969, "step": 854 }, { - "epoch": 17.3, - "learning_rate": 9.094387755102042e-05, - "loss": 1.1025, + "epoch": 9.66, + "learning_rate": 7.855113636363637e-05, + "loss": 0.5045, "step": 855 }, { - "epoch": 17.32, - "learning_rate": 9.081632653061225e-05, - "loss": 1.0968, + "epoch": 9.67, + "learning_rate": 7.840909090909091e-05, + "loss": 0.5207, "step": 856 }, { - "epoch": 17.34, - "learning_rate": 9.068877551020408e-05, - "loss": 1.1683, + "epoch": 9.68, + "learning_rate": 7.826704545454546e-05, + "loss": 0.5098, "step": 857 }, { - "epoch": 17.36, - "learning_rate": 9.056122448979592e-05, - "loss": 1.0975, + "epoch": 9.69, + "learning_rate": 7.8125e-05, + "loss": 0.5005, "step": 858 }, { - "epoch": 17.38, - "learning_rate": 9.043367346938775e-05, - "loss": 1.1274, + "epoch": 9.7, + "learning_rate": 7.798295454545455e-05, + "loss": 0.5028, "step": 859 }, { - "epoch": 17.4, - "learning_rate": 9.030612244897958e-05, - "loss": 1.0916, + "epoch": 9.72, + "learning_rate": 7.784090909090909e-05, + "loss": 0.5067, "step": 860 }, { - "epoch": 17.42, - "learning_rate": 9.017857142857143e-05, - "loss": 1.0912, + "epoch": 9.73, + "learning_rate": 7.769886363636364e-05, + "loss": 0.484, "step": 861 }, { - "epoch": 17.44, - "learning_rate": 9.005102040816327e-05, - "loss": 1.0875, + "epoch": 9.74, + "learning_rate": 7.755681818181818e-05, + "loss": 0.5029, "step": 862 }, { - "epoch": 17.46, - "learning_rate": 8.99234693877551e-05, - "loss": 1.05, + "epoch": 9.75, + "learning_rate": 7.741477272727273e-05, + "loss": 0.5077, "step": 863 }, { - "epoch": 17.48, - "learning_rate": 8.979591836734695e-05, - "loss": 1.1418, + "epoch": 9.76, + "learning_rate": 7.727272727272727e-05, + "loss": 0.5091, "step": 864 }, { - "epoch": 17.5, - "learning_rate": 8.966836734693878e-05, - "loss": 1.0609, + "epoch": 9.77, + "learning_rate": 7.713068181818183e-05, + "loss": 0.4781, "step": 865 }, { - "epoch": 17.52, - "learning_rate": 8.954081632653062e-05, - "loss": 1.1611, + "epoch": 9.78, + "learning_rate": 7.698863636363636e-05, + "loss": 0.5124, "step": 866 }, { - "epoch": 17.54, - "learning_rate": 8.941326530612245e-05, - "loss": 1.1065, + "epoch": 9.79, + "learning_rate": 7.684659090909091e-05, + "loss": 0.4859, "step": 867 }, { - "epoch": 17.56, - "learning_rate": 8.92857142857143e-05, - "loss": 1.1611, + "epoch": 9.81, + "learning_rate": 7.670454545454547e-05, + "loss": 0.4872, "step": 868 }, { - "epoch": 17.58, - "learning_rate": 8.915816326530613e-05, - "loss": 1.1398, + "epoch": 9.82, + "learning_rate": 7.65625e-05, + "loss": 0.4675, "step": 869 }, { - "epoch": 17.6, - "learning_rate": 8.903061224489796e-05, - "loss": 1.1055, + "epoch": 9.83, + "learning_rate": 7.642045454545454e-05, + "loss": 0.5056, "step": 870 }, { - "epoch": 17.62, - "learning_rate": 8.89030612244898e-05, - "loss": 1.1314, + "epoch": 9.84, + "learning_rate": 7.62784090909091e-05, + "loss": 0.4868, "step": 871 }, { - "epoch": 17.64, - "learning_rate": 8.877551020408164e-05, - "loss": 1.1084, + "epoch": 9.85, + "learning_rate": 7.613636363636363e-05, + "loss": 0.4907, "step": 872 }, { - "epoch": 17.66, - "learning_rate": 8.864795918367348e-05, - "loss": 1.1254, + "epoch": 9.86, + "learning_rate": 7.599431818181818e-05, + "loss": 0.474, "step": 873 }, { - "epoch": 17.68, - "learning_rate": 8.852040816326531e-05, - "loss": 1.142, + "epoch": 9.87, + "learning_rate": 7.585227272727274e-05, + "loss": 0.4813, "step": 874 }, { - "epoch": 17.7, - "learning_rate": 8.839285714285714e-05, - "loss": 1.1371, + "epoch": 9.89, + "learning_rate": 7.571022727272727e-05, + "loss": 0.4838, "step": 875 }, { - "epoch": 17.72, - "learning_rate": 8.826530612244899e-05, - "loss": 1.1092, + "epoch": 9.9, + "learning_rate": 7.556818181818183e-05, + "loss": 0.4935, "step": 876 }, { - "epoch": 17.75, - "learning_rate": 8.813775510204082e-05, - "loss": 1.161, + "epoch": 9.91, + "learning_rate": 7.542613636363637e-05, + "loss": 0.4884, "step": 877 }, { - "epoch": 17.77, - "learning_rate": 8.801020408163265e-05, - "loss": 1.1044, + "epoch": 9.92, + "learning_rate": 7.52840909090909e-05, + "loss": 0.4797, "step": 878 }, { - "epoch": 17.79, - "learning_rate": 8.788265306122449e-05, - "loss": 1.117, + "epoch": 9.93, + "learning_rate": 7.514204545454546e-05, + "loss": 0.479, "step": 879 }, { - "epoch": 17.81, - "learning_rate": 8.775510204081632e-05, - "loss": 1.1262, + "epoch": 9.94, + "learning_rate": 7.500000000000001e-05, + "loss": 0.4727, "step": 880 }, { - "epoch": 17.83, - "learning_rate": 8.762755102040817e-05, - "loss": 1.0829, + "epoch": 9.95, + "learning_rate": 7.485795454545454e-05, + "loss": 0.4758, "step": 881 }, { - "epoch": 17.85, - "learning_rate": 8.75e-05, - "loss": 1.1393, + "epoch": 9.96, + "learning_rate": 7.47159090909091e-05, + "loss": 0.482, "step": 882 }, { - "epoch": 17.87, - "learning_rate": 8.737244897959183e-05, - "loss": 1.1781, + "epoch": 9.98, + "learning_rate": 7.457386363636364e-05, + "loss": 0.4951, "step": 883 }, { - "epoch": 17.89, - "learning_rate": 8.724489795918367e-05, - "loss": 1.1582, + "epoch": 9.99, + "learning_rate": 7.443181818181817e-05, + "loss": 0.4823, "step": 884 }, { - "epoch": 17.91, - "learning_rate": 8.711734693877552e-05, - "loss": 1.1469, + "epoch": 10.0, + "learning_rate": 7.428977272727273e-05, + "loss": 0.4638, "step": 885 }, { - "epoch": 17.93, - "learning_rate": 8.698979591836735e-05, - "loss": 1.1494, + "epoch": 10.01, + "learning_rate": 7.414772727272728e-05, + "loss": 0.4715, "step": 886 }, { - "epoch": 17.95, - "learning_rate": 8.68622448979592e-05, - "loss": 1.1251, + "epoch": 10.02, + "learning_rate": 7.400568181818182e-05, + "loss": 0.461, "step": 887 }, { - "epoch": 17.97, - "learning_rate": 8.673469387755102e-05, - "loss": 1.1624, + "epoch": 10.03, + "learning_rate": 7.386363636363637e-05, + "loss": 0.4429, "step": 888 }, { - "epoch": 17.99, - "learning_rate": 8.660714285714287e-05, - "loss": 1.0842, + "epoch": 10.04, + "learning_rate": 7.372159090909091e-05, + "loss": 0.4403, "step": 889 }, { - "epoch": 18.01, - "learning_rate": 8.64795918367347e-05, - "loss": 1.1944, + "epoch": 10.05, + "learning_rate": 7.357954545454546e-05, + "loss": 0.4519, "step": 890 }, { - "epoch": 18.03, - "learning_rate": 8.635204081632653e-05, - "loss": 1.0642, + "epoch": 10.07, + "learning_rate": 7.34375e-05, + "loss": 0.4611, "step": 891 }, { - "epoch": 18.05, - "learning_rate": 8.622448979591838e-05, - "loss": 1.0459, + "epoch": 10.08, + "learning_rate": 7.329545454545455e-05, + "loss": 0.4543, "step": 892 }, { - "epoch": 18.07, - "learning_rate": 8.60969387755102e-05, - "loss": 1.0941, + "epoch": 10.09, + "learning_rate": 7.315340909090909e-05, + "loss": 0.4528, "step": 893 }, { - "epoch": 18.09, - "learning_rate": 8.596938775510205e-05, - "loss": 1.0457, + "epoch": 10.1, + "learning_rate": 7.301136363636364e-05, + "loss": 0.4586, "step": 894 }, { - "epoch": 18.11, - "learning_rate": 8.584183673469388e-05, - "loss": 1.1033, + "epoch": 10.11, + "learning_rate": 7.286931818181818e-05, + "loss": 0.4418, "step": 895 }, { - "epoch": 18.13, - "learning_rate": 8.571428571428571e-05, - "loss": 1.0756, + "epoch": 10.12, + "learning_rate": 7.272727272727273e-05, + "loss": 0.4435, "step": 896 }, { - "epoch": 18.15, - "learning_rate": 8.558673469387756e-05, - "loss": 1.0615, + "epoch": 10.13, + "learning_rate": 7.258522727272727e-05, + "loss": 0.44, "step": 897 }, { - "epoch": 18.17, - "learning_rate": 8.545918367346939e-05, - "loss": 1.0828, + "epoch": 10.15, + "learning_rate": 7.244318181818183e-05, + "loss": 0.4589, "step": 898 }, { - "epoch": 18.19, - "learning_rate": 8.533163265306123e-05, - "loss": 1.1158, + "epoch": 10.16, + "learning_rate": 7.230113636363636e-05, + "loss": 0.4597, "step": 899 }, { - "epoch": 18.21, - "learning_rate": 8.520408163265306e-05, - "loss": 1.0133, + "epoch": 10.17, + "learning_rate": 7.215909090909091e-05, + "loss": 0.4479, "step": 900 }, { - "epoch": 18.23, - "learning_rate": 8.50765306122449e-05, - "loss": 1.0437, + "epoch": 10.18, + "learning_rate": 7.201704545454547e-05, + "loss": 0.4477, "step": 901 }, { - "epoch": 18.25, - "learning_rate": 8.494897959183674e-05, - "loss": 1.0372, + "epoch": 10.19, + "learning_rate": 7.1875e-05, + "loss": 0.446, "step": 902 }, { - "epoch": 18.27, - "learning_rate": 8.482142857142857e-05, - "loss": 1.1012, + "epoch": 10.2, + "learning_rate": 7.173295454545454e-05, + "loss": 0.4546, "step": 903 }, { - "epoch": 18.29, - "learning_rate": 8.469387755102041e-05, - "loss": 1.0777, + "epoch": 10.21, + "learning_rate": 7.15909090909091e-05, + "loss": 0.4347, "step": 904 }, { - "epoch": 18.31, - "learning_rate": 8.456632653061224e-05, - "loss": 1.0799, + "epoch": 10.22, + "learning_rate": 7.144886363636363e-05, + "loss": 0.452, "step": 905 }, { - "epoch": 18.33, - "learning_rate": 8.443877551020409e-05, - "loss": 0.9846, + "epoch": 10.24, + "learning_rate": 7.130681818181818e-05, + "loss": 0.4536, "step": 906 }, { - "epoch": 18.35, - "learning_rate": 8.431122448979592e-05, - "loss": 1.1, + "epoch": 10.25, + "learning_rate": 7.116477272727274e-05, + "loss": 0.4492, "step": 907 }, { - "epoch": 18.37, - "learning_rate": 8.418367346938776e-05, - "loss": 1.0787, + "epoch": 10.26, + "learning_rate": 7.102272727272727e-05, + "loss": 0.4401, "step": 908 }, { - "epoch": 18.39, - "learning_rate": 8.40561224489796e-05, - "loss": 1.0647, + "epoch": 10.27, + "learning_rate": 7.088068181818183e-05, + "loss": 0.4609, "step": 909 }, { - "epoch": 18.41, - "learning_rate": 8.392857142857144e-05, - "loss": 1.056, + "epoch": 10.28, + "learning_rate": 7.073863636363637e-05, + "loss": 0.4544, "step": 910 }, { - "epoch": 18.43, - "learning_rate": 8.380102040816327e-05, - "loss": 1.1131, + "epoch": 10.29, + "learning_rate": 7.05965909090909e-05, + "loss": 0.4477, "step": 911 }, { - "epoch": 18.45, - "learning_rate": 8.367346938775511e-05, - "loss": 1.0825, + "epoch": 10.3, + "learning_rate": 7.045454545454546e-05, + "loss": 0.4445, "step": 912 }, { - "epoch": 18.47, - "learning_rate": 8.354591836734695e-05, - "loss": 1.0681, + "epoch": 10.31, + "learning_rate": 7.031250000000001e-05, + "loss": 0.4544, "step": 913 }, { - "epoch": 18.49, - "learning_rate": 8.341836734693878e-05, - "loss": 1.0479, + "epoch": 10.33, + "learning_rate": 7.017045454545454e-05, + "loss": 0.4634, "step": 914 }, { - "epoch": 18.51, - "learning_rate": 8.329081632653062e-05, - "loss": 1.0921, + "epoch": 10.34, + "learning_rate": 7.00284090909091e-05, + "loss": 0.4499, "step": 915 }, { - "epoch": 18.53, - "learning_rate": 8.316326530612245e-05, - "loss": 1.0626, + "epoch": 10.35, + "learning_rate": 6.988636363636364e-05, + "loss": 0.4354, "step": 916 }, { - "epoch": 18.55, - "learning_rate": 8.30357142857143e-05, - "loss": 1.0518, + "epoch": 10.36, + "learning_rate": 6.974431818181818e-05, + "loss": 0.454, "step": 917 }, { - "epoch": 18.57, - "learning_rate": 8.290816326530613e-05, - "loss": 1.0557, + "epoch": 10.37, + "learning_rate": 6.960227272727273e-05, + "loss": 0.4473, "step": 918 }, { - "epoch": 18.6, - "learning_rate": 8.278061224489796e-05, - "loss": 1.0831, + "epoch": 10.38, + "learning_rate": 6.946022727272728e-05, + "loss": 0.4347, "step": 919 }, { - "epoch": 18.62, - "learning_rate": 8.26530612244898e-05, - "loss": 1.0307, + "epoch": 10.39, + "learning_rate": 6.931818181818182e-05, + "loss": 0.441, "step": 920 }, { - "epoch": 18.64, - "learning_rate": 8.252551020408163e-05, - "loss": 1.0455, + "epoch": 10.4, + "learning_rate": 6.917613636363637e-05, + "loss": 0.4545, "step": 921 }, { - "epoch": 18.66, - "learning_rate": 8.239795918367348e-05, - "loss": 1.0667, + "epoch": 10.42, + "learning_rate": 6.903409090909091e-05, + "loss": 0.458, "step": 922 }, { - "epoch": 18.68, - "learning_rate": 8.227040816326531e-05, - "loss": 1.0736, + "epoch": 10.43, + "learning_rate": 6.889204545454546e-05, + "loss": 0.4381, "step": 923 }, { - "epoch": 18.7, - "learning_rate": 8.214285714285714e-05, - "loss": 1.0108, + "epoch": 10.44, + "learning_rate": 6.875e-05, + "loss": 0.441, "step": 924 }, { - "epoch": 18.72, - "learning_rate": 8.201530612244898e-05, - "loss": 1.0458, + "epoch": 10.45, + "learning_rate": 6.860795454545455e-05, + "loss": 0.4446, "step": 925 }, { - "epoch": 18.74, - "learning_rate": 8.188775510204081e-05, - "loss": 1.0852, + "epoch": 10.46, + "learning_rate": 6.84659090909091e-05, + "loss": 0.4548, "step": 926 }, { - "epoch": 18.76, - "learning_rate": 8.176020408163265e-05, - "loss": 1.1207, + "epoch": 10.47, + "learning_rate": 6.832386363636364e-05, + "loss": 0.4404, "step": 927 }, { - "epoch": 18.78, - "learning_rate": 8.163265306122449e-05, - "loss": 1.0914, + "epoch": 10.48, + "learning_rate": 6.818181818181818e-05, + "loss": 0.4446, "step": 928 }, { - "epoch": 18.8, - "learning_rate": 8.150510204081633e-05, - "loss": 1.1108, + "epoch": 10.5, + "learning_rate": 6.803977272727273e-05, + "loss": 0.4434, "step": 929 }, { - "epoch": 18.82, - "learning_rate": 8.137755102040817e-05, - "loss": 1.1394, + "epoch": 10.51, + "learning_rate": 6.789772727272727e-05, + "loss": 0.4778, "step": 930 }, { - "epoch": 18.84, - "learning_rate": 8.125000000000001e-05, - "loss": 1.029, + "epoch": 10.52, + "learning_rate": 6.775568181818182e-05, + "loss": 0.4356, "step": 931 }, { - "epoch": 18.86, - "learning_rate": 8.112244897959184e-05, - "loss": 1.0661, + "epoch": 10.53, + "learning_rate": 6.761363636363636e-05, + "loss": 0.4464, "step": 932 }, { - "epoch": 18.88, - "learning_rate": 8.099489795918369e-05, - "loss": 1.0303, + "epoch": 10.54, + "learning_rate": 6.747159090909091e-05, + "loss": 0.4387, "step": 933 }, { - "epoch": 18.9, - "learning_rate": 8.086734693877552e-05, - "loss": 1.1144, + "epoch": 10.55, + "learning_rate": 6.732954545454547e-05, + "loss": 0.456, "step": 934 }, { - "epoch": 18.92, - "learning_rate": 8.073979591836736e-05, - "loss": 1.1096, + "epoch": 10.56, + "learning_rate": 6.71875e-05, + "loss": 0.453, "step": 935 }, { - "epoch": 18.94, - "learning_rate": 8.061224489795919e-05, - "loss": 1.123, + "epoch": 10.57, + "learning_rate": 6.704545454545455e-05, + "loss": 0.4611, "step": 936 }, { - "epoch": 18.96, - "learning_rate": 8.048469387755102e-05, - "loss": 1.1002, + "epoch": 10.59, + "learning_rate": 6.69034090909091e-05, + "loss": 0.4354, "step": 937 }, { - "epoch": 18.98, - "learning_rate": 8.035714285714287e-05, - "loss": 1.1016, + "epoch": 10.6, + "learning_rate": 6.676136363636364e-05, + "loss": 0.4519, "step": 938 }, { - "epoch": 19.0, - "learning_rate": 8.02295918367347e-05, - "loss": 1.0847, + "epoch": 10.61, + "learning_rate": 6.661931818181818e-05, + "loss": 0.4435, "step": 939 }, { - "epoch": 19.02, - "learning_rate": 8.010204081632653e-05, - "loss": 1.1029, + "epoch": 10.62, + "learning_rate": 6.647727272727274e-05, + "loss": 0.4422, "step": 940 }, { - "epoch": 19.04, - "learning_rate": 7.997448979591837e-05, - "loss": 1.041, + "epoch": 10.63, + "learning_rate": 6.633522727272727e-05, + "loss": 0.4344, "step": 941 }, { - "epoch": 19.06, - "learning_rate": 7.98469387755102e-05, - "loss": 1.01, + "epoch": 10.64, + "learning_rate": 6.619318181818183e-05, + "loss": 0.4419, "step": 942 }, { - "epoch": 19.08, - "learning_rate": 7.971938775510205e-05, - "loss": 1.0197, + "epoch": 10.65, + "learning_rate": 6.605113636363637e-05, + "loss": 0.4308, "step": 943 }, { - "epoch": 19.1, - "learning_rate": 7.959183673469388e-05, - "loss": 1.0543, + "epoch": 10.66, + "learning_rate": 6.59090909090909e-05, + "loss": 0.4043, "step": 944 }, { - "epoch": 19.12, - "learning_rate": 7.946428571428571e-05, - "loss": 1.0369, + "epoch": 10.68, + "learning_rate": 6.576704545454546e-05, + "loss": 0.4626, "step": 945 }, { - "epoch": 19.14, - "learning_rate": 7.933673469387755e-05, - "loss": 1.0154, + "epoch": 10.69, + "learning_rate": 6.562500000000001e-05, + "loss": 0.4365, "step": 946 }, { - "epoch": 19.16, - "learning_rate": 7.920918367346939e-05, - "loss": 0.9546, + "epoch": 10.7, + "learning_rate": 6.548295454545454e-05, + "loss": 0.4397, "step": 947 }, { - "epoch": 19.18, - "learning_rate": 7.908163265306123e-05, - "loss": 0.9982, + "epoch": 10.71, + "learning_rate": 6.53409090909091e-05, + "loss": 0.4463, "step": 948 }, { - "epoch": 19.2, - "learning_rate": 7.895408163265306e-05, - "loss": 1.0748, + "epoch": 10.72, + "learning_rate": 6.519886363636364e-05, + "loss": 0.4394, "step": 949 }, { - "epoch": 19.22, - "learning_rate": 7.882653061224489e-05, - "loss": 1.0562, + "epoch": 10.73, + "learning_rate": 6.505681818181818e-05, + "loss": 0.45, "step": 950 }, { - "epoch": 19.24, - "learning_rate": 7.869897959183674e-05, - "loss": 1.0352, + "epoch": 10.74, + "learning_rate": 6.491477272727273e-05, + "loss": 0.4363, "step": 951 }, { - "epoch": 19.26, - "learning_rate": 7.857142857142858e-05, - "loss": 0.9976, + "epoch": 10.76, + "learning_rate": 6.477272727272728e-05, + "loss": 0.4566, "step": 952 }, { - "epoch": 19.28, - "learning_rate": 7.844387755102041e-05, - "loss": 1.0221, + "epoch": 10.77, + "learning_rate": 6.463068181818183e-05, + "loss": 0.4235, "step": 953 }, { - "epoch": 19.3, - "learning_rate": 7.831632653061226e-05, - "loss": 1.0119, + "epoch": 10.78, + "learning_rate": 6.448863636363637e-05, + "loss": 0.4458, "step": 954 }, { - "epoch": 19.32, - "learning_rate": 7.818877551020409e-05, - "loss": 1.0657, + "epoch": 10.79, + "learning_rate": 6.434659090909092e-05, + "loss": 0.423, "step": 955 }, { - "epoch": 19.34, - "learning_rate": 7.806122448979593e-05, - "loss": 0.9591, + "epoch": 10.8, + "learning_rate": 6.420454545454546e-05, + "loss": 0.445, "step": 956 }, { - "epoch": 19.36, - "learning_rate": 7.793367346938776e-05, - "loss": 1.0101, + "epoch": 10.81, + "learning_rate": 6.40625e-05, + "loss": 0.424, "step": 957 }, { - "epoch": 19.38, - "learning_rate": 7.780612244897959e-05, - "loss": 1.0453, + "epoch": 10.82, + "learning_rate": 6.392045454545455e-05, + "loss": 0.4224, "step": 958 }, { - "epoch": 19.4, - "learning_rate": 7.767857142857144e-05, - "loss": 1.0461, + "epoch": 10.83, + "learning_rate": 6.37784090909091e-05, + "loss": 0.4223, "step": 959 }, { - "epoch": 19.42, - "learning_rate": 7.755102040816327e-05, - "loss": 1.0959, + "epoch": 10.85, + "learning_rate": 6.363636363636364e-05, + "loss": 0.4314, "step": 960 }, { - "epoch": 19.44, - "learning_rate": 7.742346938775511e-05, - "loss": 1.0608, + "epoch": 10.86, + "learning_rate": 6.349431818181819e-05, + "loss": 0.4488, "step": 961 }, { - "epoch": 19.47, - "learning_rate": 7.729591836734694e-05, - "loss": 1.1177, + "epoch": 10.87, + "learning_rate": 6.335227272727273e-05, + "loss": 0.423, "step": 962 }, { - "epoch": 19.49, - "learning_rate": 7.716836734693877e-05, - "loss": 1.0354, + "epoch": 10.88, + "learning_rate": 6.321022727272728e-05, + "loss": 0.4416, "step": 963 }, { - "epoch": 19.51, - "learning_rate": 7.704081632653062e-05, - "loss": 1.0507, + "epoch": 10.89, + "learning_rate": 6.306818181818182e-05, + "loss": 0.423, "step": 964 }, { - "epoch": 19.53, - "learning_rate": 7.691326530612245e-05, - "loss": 1.0313, + "epoch": 10.9, + "learning_rate": 6.292613636363637e-05, + "loss": 0.4502, "step": 965 }, { - "epoch": 19.55, - "learning_rate": 7.67857142857143e-05, - "loss": 1.0569, + "epoch": 10.91, + "learning_rate": 6.278409090909091e-05, + "loss": 0.4266, "step": 966 }, { - "epoch": 19.57, - "learning_rate": 7.665816326530612e-05, - "loss": 1.0862, + "epoch": 10.92, + "learning_rate": 6.264204545454546e-05, + "loss": 0.4344, "step": 967 }, { - "epoch": 19.59, - "learning_rate": 7.653061224489796e-05, - "loss": 1.0593, + "epoch": 10.94, + "learning_rate": 6.25e-05, + "loss": 0.434, "step": 968 }, { - "epoch": 19.61, - "learning_rate": 7.64030612244898e-05, - "loss": 1.0602, + "epoch": 10.95, + "learning_rate": 6.235795454545455e-05, + "loss": 0.4269, "step": 969 }, { - "epoch": 19.63, - "learning_rate": 7.627551020408163e-05, - "loss": 1.0048, + "epoch": 10.96, + "learning_rate": 6.221590909090909e-05, + "loss": 0.4158, "step": 970 }, { - "epoch": 19.65, - "learning_rate": 7.614795918367347e-05, - "loss": 1.0346, + "epoch": 10.97, + "learning_rate": 6.207386363636364e-05, + "loss": 0.4231, "step": 971 }, { - "epoch": 19.67, - "learning_rate": 7.60204081632653e-05, - "loss": 1.0172, + "epoch": 10.98, + "learning_rate": 6.193181818181818e-05, + "loss": 0.4235, "step": 972 }, { - "epoch": 19.69, - "learning_rate": 7.589285714285714e-05, - "loss": 1.02, + "epoch": 10.99, + "learning_rate": 6.178977272727273e-05, + "loss": 0.4504, "step": 973 }, { - "epoch": 19.71, - "learning_rate": 7.576530612244898e-05, - "loss": 1.0028, + "epoch": 11.0, + "learning_rate": 6.164772727272727e-05, + "loss": 0.4394, "step": 974 }, { - "epoch": 19.73, - "learning_rate": 7.563775510204083e-05, - "loss": 1.08, + "epoch": 11.02, + "learning_rate": 6.150568181818183e-05, + "loss": 0.4333, "step": 975 }, { - "epoch": 19.75, - "learning_rate": 7.551020408163266e-05, - "loss": 1.0402, + "epoch": 11.03, + "learning_rate": 6.136363636363636e-05, + "loss": 0.3936, "step": 976 }, { - "epoch": 19.77, - "learning_rate": 7.53826530612245e-05, - "loss": 1.0567, + "epoch": 11.04, + "learning_rate": 6.122159090909091e-05, + "loss": 0.3933, "step": 977 }, { - "epoch": 19.79, - "learning_rate": 7.525510204081633e-05, - "loss": 1.0169, + "epoch": 11.05, + "learning_rate": 6.107954545454547e-05, + "loss": 0.4161, "step": 978 }, { - "epoch": 19.81, - "learning_rate": 7.512755102040818e-05, - "loss": 0.9881, + "epoch": 11.06, + "learning_rate": 6.0937500000000004e-05, + "loss": 0.4097, "step": 979 }, { - "epoch": 19.83, - "learning_rate": 7.500000000000001e-05, - "loss": 1.0677, + "epoch": 11.07, + "learning_rate": 6.079545454545454e-05, + "loss": 0.412, "step": 980 }, { - "epoch": 19.85, - "learning_rate": 7.487244897959184e-05, - "loss": 1.1026, + "epoch": 11.08, + "learning_rate": 6.0653409090909094e-05, + "loss": 0.4104, "step": 981 }, { - "epoch": 19.87, - "learning_rate": 7.474489795918368e-05, - "loss": 1.0101, + "epoch": 11.09, + "learning_rate": 6.051136363636364e-05, + "loss": 0.4152, "step": 982 }, { - "epoch": 19.89, - "learning_rate": 7.461734693877551e-05, - "loss": 1.069, + "epoch": 11.11, + "learning_rate": 6.036931818181818e-05, + "loss": 0.4037, "step": 983 }, { - "epoch": 19.91, - "learning_rate": 7.448979591836736e-05, - "loss": 1.0493, + "epoch": 11.12, + "learning_rate": 6.022727272727273e-05, + "loss": 0.413, "step": 984 }, { - "epoch": 19.93, - "learning_rate": 7.436224489795919e-05, - "loss": 1.0858, + "epoch": 11.13, + "learning_rate": 6.0085227272727274e-05, + "loss": 0.4413, "step": 985 }, { - "epoch": 19.95, - "learning_rate": 7.423469387755102e-05, - "loss": 1.0734, + "epoch": 11.14, + "learning_rate": 5.9943181818181826e-05, + "loss": 0.3908, "step": 986 }, { - "epoch": 19.97, - "learning_rate": 7.410714285714286e-05, - "loss": 1.0203, + "epoch": 11.15, + "learning_rate": 5.9801136363636365e-05, + "loss": 0.3982, "step": 987 }, { - "epoch": 19.99, - "learning_rate": 7.39795918367347e-05, - "loss": 1.0285, + "epoch": 11.16, + "learning_rate": 5.965909090909091e-05, + "loss": 0.4109, "step": 988 }, { - "epoch": 20.01, - "learning_rate": 7.385204081632653e-05, - "loss": 0.9446, + "epoch": 11.17, + "learning_rate": 5.951704545454546e-05, + "loss": 0.3923, "step": 989 }, { - "epoch": 20.03, - "learning_rate": 7.372448979591837e-05, - "loss": 0.9915, + "epoch": 11.18, + "learning_rate": 5.9375e-05, + "loss": 0.4107, "step": 990 }, { - "epoch": 20.05, - "learning_rate": 7.35969387755102e-05, - "loss": 0.9882, + "epoch": 11.2, + "learning_rate": 5.9232954545454545e-05, + "loss": 0.4099, "step": 991 }, { - "epoch": 20.07, - "learning_rate": 7.346938775510205e-05, - "loss": 0.9338, + "epoch": 11.21, + "learning_rate": 5.90909090909091e-05, + "loss": 0.4163, "step": 992 }, { - "epoch": 20.09, - "learning_rate": 7.334183673469388e-05, - "loss": 0.942, + "epoch": 11.22, + "learning_rate": 5.8948863636363635e-05, + "loss": 0.4189, "step": 993 }, { - "epoch": 20.11, - "learning_rate": 7.321428571428571e-05, - "loss": 0.9725, + "epoch": 11.23, + "learning_rate": 5.880681818181818e-05, + "loss": 0.3889, "step": 994 }, { - "epoch": 20.13, - "learning_rate": 7.308673469387755e-05, - "loss": 1.027, + "epoch": 11.24, + "learning_rate": 5.866477272727273e-05, + "loss": 0.3988, "step": 995 }, { - "epoch": 20.15, - "learning_rate": 7.29591836734694e-05, - "loss": 1.0081, + "epoch": 11.25, + "learning_rate": 5.852272727272727e-05, + "loss": 0.4215, "step": 996 }, { - "epoch": 20.17, - "learning_rate": 7.283163265306123e-05, - "loss": 1.0117, + "epoch": 11.26, + "learning_rate": 5.838068181818183e-05, + "loss": 0.4207, "step": 997 }, { - "epoch": 20.19, - "learning_rate": 7.270408163265307e-05, - "loss": 0.969, + "epoch": 11.27, + "learning_rate": 5.823863636363637e-05, + "loss": 0.413, "step": 998 }, { - "epoch": 20.21, - "learning_rate": 7.25765306122449e-05, - "loss": 1.0024, + "epoch": 11.29, + "learning_rate": 5.8096590909090906e-05, + "loss": 0.4057, "step": 999 }, { - "epoch": 20.23, - "learning_rate": 7.244897959183675e-05, - "loss": 0.994, + "epoch": 11.3, + "learning_rate": 5.7954545454545464e-05, + "loss": 0.3939, "step": 1000 }, { - "epoch": 20.25, - "learning_rate": 7.232142857142858e-05, - "loss": 1.0248, + "epoch": 11.31, + "learning_rate": 5.78125e-05, + "loss": 0.4199, "step": 1001 }, { - "epoch": 20.27, - "learning_rate": 7.219387755102042e-05, - "loss": 1.0493, + "epoch": 11.32, + "learning_rate": 5.767045454545454e-05, + "loss": 0.4076, "step": 1002 }, { - "epoch": 20.29, - "learning_rate": 7.206632653061225e-05, - "loss": 1.0011, + "epoch": 11.33, + "learning_rate": 5.75284090909091e-05, + "loss": 0.4079, "step": 1003 }, { - "epoch": 20.31, - "learning_rate": 7.193877551020408e-05, - "loss": 0.9874, + "epoch": 11.34, + "learning_rate": 5.738636363636364e-05, + "loss": 0.4002, "step": 1004 }, { - "epoch": 20.34, - "learning_rate": 7.181122448979593e-05, - "loss": 1.0049, + "epoch": 11.35, + "learning_rate": 5.724431818181818e-05, + "loss": 0.3801, "step": 1005 }, { - "epoch": 20.36, - "learning_rate": 7.168367346938776e-05, - "loss": 1.0314, + "epoch": 11.37, + "learning_rate": 5.7102272727272735e-05, + "loss": 0.3939, "step": 1006 }, { - "epoch": 20.38, - "learning_rate": 7.155612244897959e-05, - "loss": 0.9742, + "epoch": 11.38, + "learning_rate": 5.696022727272727e-05, + "loss": 0.3904, "step": 1007 }, { - "epoch": 20.4, - "learning_rate": 7.142857142857143e-05, - "loss": 1.0621, + "epoch": 11.39, + "learning_rate": 5.6818181818181825e-05, + "loss": 0.406, "step": 1008 }, { - "epoch": 20.42, - "learning_rate": 7.130102040816326e-05, - "loss": 0.9672, + "epoch": 11.4, + "learning_rate": 5.667613636363637e-05, + "loss": 0.4185, "step": 1009 }, { - "epoch": 20.44, - "learning_rate": 7.117346938775511e-05, - "loss": 1.0018, + "epoch": 11.41, + "learning_rate": 5.653409090909091e-05, + "loss": 0.3976, "step": 1010 }, { - "epoch": 20.46, - "learning_rate": 7.104591836734694e-05, - "loss": 1.0045, + "epoch": 11.42, + "learning_rate": 5.639204545454546e-05, + "loss": 0.3907, "step": 1011 }, { - "epoch": 20.48, - "learning_rate": 7.091836734693877e-05, - "loss": 0.9675, + "epoch": 11.43, + "learning_rate": 5.6250000000000005e-05, + "loss": 0.4065, "step": 1012 }, { - "epoch": 20.5, - "learning_rate": 7.079081632653062e-05, - "loss": 0.976, + "epoch": 11.44, + "learning_rate": 5.6107954545454544e-05, + "loss": 0.4069, "step": 1013 }, { - "epoch": 20.52, - "learning_rate": 7.066326530612245e-05, - "loss": 1.0523, + "epoch": 11.46, + "learning_rate": 5.5965909090909095e-05, + "loss": 0.3964, "step": 1014 }, { - "epoch": 20.54, - "learning_rate": 7.053571428571429e-05, - "loss": 1.052, + "epoch": 11.47, + "learning_rate": 5.582386363636364e-05, + "loss": 0.3912, "step": 1015 }, { - "epoch": 20.56, - "learning_rate": 7.040816326530612e-05, - "loss": 0.9903, + "epoch": 11.48, + "learning_rate": 5.568181818181818e-05, + "loss": 0.3944, "step": 1016 }, { - "epoch": 20.58, - "learning_rate": 7.028061224489795e-05, - "loss": 1.0337, + "epoch": 11.49, + "learning_rate": 5.553977272727273e-05, + "loss": 0.4197, "step": 1017 }, { - "epoch": 20.6, - "learning_rate": 7.01530612244898e-05, - "loss": 1.1122, + "epoch": 11.5, + "learning_rate": 5.5397727272727276e-05, + "loss": 0.4064, "step": 1018 }, { - "epoch": 20.62, - "learning_rate": 7.002551020408164e-05, - "loss": 1.0133, + "epoch": 11.51, + "learning_rate": 5.525568181818183e-05, + "loss": 0.4054, "step": 1019 }, { - "epoch": 20.64, - "learning_rate": 6.989795918367347e-05, - "loss": 0.9588, + "epoch": 11.52, + "learning_rate": 5.5113636363636366e-05, + "loss": 0.4128, "step": 1020 }, { - "epoch": 20.66, - "learning_rate": 6.977040816326532e-05, - "loss": 0.9892, + "epoch": 11.53, + "learning_rate": 5.497159090909091e-05, + "loss": 0.3976, "step": 1021 }, { - "epoch": 20.68, - "learning_rate": 6.964285714285715e-05, - "loss": 1.025, + "epoch": 11.55, + "learning_rate": 5.482954545454546e-05, + "loss": 0.3863, "step": 1022 }, { - "epoch": 20.7, - "learning_rate": 6.951530612244899e-05, - "loss": 1.0196, + "epoch": 11.56, + "learning_rate": 5.46875e-05, + "loss": 0.3994, "step": 1023 }, { - "epoch": 20.72, - "learning_rate": 6.938775510204082e-05, - "loss": 1.0146, + "epoch": 11.57, + "learning_rate": 5.4545454545454546e-05, + "loss": 0.401, "step": 1024 }, { - "epoch": 20.74, - "learning_rate": 6.926020408163265e-05, - "loss": 1.0656, + "epoch": 11.58, + "learning_rate": 5.44034090909091e-05, + "loss": 0.3948, "step": 1025 }, { - "epoch": 20.76, - "learning_rate": 6.91326530612245e-05, - "loss": 0.9584, + "epoch": 11.59, + "learning_rate": 5.4261363636363636e-05, + "loss": 0.3967, "step": 1026 }, { - "epoch": 20.78, - "learning_rate": 6.900510204081633e-05, - "loss": 0.9877, + "epoch": 11.6, + "learning_rate": 5.411931818181818e-05, + "loss": 0.413, "step": 1027 }, { - "epoch": 20.8, - "learning_rate": 6.887755102040817e-05, - "loss": 1.0607, + "epoch": 11.61, + "learning_rate": 5.397727272727273e-05, + "loss": 0.4032, "step": 1028 }, { - "epoch": 20.82, - "learning_rate": 6.875e-05, - "loss": 0.9969, + "epoch": 11.63, + "learning_rate": 5.383522727272727e-05, + "loss": 0.3905, "step": 1029 }, { - "epoch": 20.84, - "learning_rate": 6.862244897959184e-05, - "loss": 0.9506, + "epoch": 11.64, + "learning_rate": 5.3693181818181823e-05, + "loss": 0.4041, "step": 1030 }, { - "epoch": 20.86, - "learning_rate": 6.849489795918368e-05, - "loss": 1.0576, + "epoch": 11.65, + "learning_rate": 5.355113636363637e-05, + "loss": 0.392, "step": 1031 }, { - "epoch": 20.88, - "learning_rate": 6.836734693877551e-05, - "loss": 1.0094, + "epoch": 11.66, + "learning_rate": 5.340909090909091e-05, + "loss": 0.3942, "step": 1032 }, { - "epoch": 20.9, - "learning_rate": 6.823979591836735e-05, - "loss": 0.9872, + "epoch": 11.67, + "learning_rate": 5.326704545454546e-05, + "loss": 0.3946, "step": 1033 }, { - "epoch": 20.92, - "learning_rate": 6.811224489795919e-05, - "loss": 1.0544, + "epoch": 11.68, + "learning_rate": 5.3125000000000004e-05, + "loss": 0.3989, "step": 1034 }, { - "epoch": 20.94, - "learning_rate": 6.798469387755102e-05, - "loss": 1.0194, + "epoch": 11.69, + "learning_rate": 5.298295454545454e-05, + "loss": 0.4101, "step": 1035 }, { - "epoch": 20.96, - "learning_rate": 6.785714285714286e-05, - "loss": 1.0009, + "epoch": 11.7, + "learning_rate": 5.2840909090909094e-05, + "loss": 0.4033, "step": 1036 }, { - "epoch": 20.98, - "learning_rate": 6.772959183673469e-05, - "loss": 0.9727, + "epoch": 11.72, + "learning_rate": 5.269886363636364e-05, + "loss": 0.3937, "step": 1037 }, { - "epoch": 21.0, - "learning_rate": 6.760204081632652e-05, - "loss": 0.9754, + "epoch": 11.73, + "learning_rate": 5.255681818181818e-05, + "loss": 0.3873, "step": 1038 }, { - "epoch": 21.02, - "learning_rate": 6.747448979591837e-05, - "loss": 0.9953, + "epoch": 11.74, + "learning_rate": 5.241477272727273e-05, + "loss": 0.3922, "step": 1039 }, { - "epoch": 21.04, - "learning_rate": 6.73469387755102e-05, - "loss": 0.9307, + "epoch": 11.75, + "learning_rate": 5.2272727272727274e-05, + "loss": 0.4016, "step": 1040 }, { - "epoch": 21.06, - "learning_rate": 6.721938775510204e-05, - "loss": 0.9151, + "epoch": 11.76, + "learning_rate": 5.2130681818181826e-05, + "loss": 0.3892, "step": 1041 }, { - "epoch": 21.08, - "learning_rate": 6.709183673469389e-05, - "loss": 0.9474, + "epoch": 11.77, + "learning_rate": 5.1988636363636364e-05, + "loss": 0.3974, "step": 1042 }, { - "epoch": 21.1, - "learning_rate": 6.696428571428572e-05, - "loss": 0.9697, + "epoch": 11.78, + "learning_rate": 5.184659090909091e-05, + "loss": 0.4024, "step": 1043 }, { - "epoch": 21.12, - "learning_rate": 6.683673469387756e-05, - "loss": 0.9423, + "epoch": 11.79, + "learning_rate": 5.170454545454546e-05, + "loss": 0.3889, "step": 1044 }, { - "epoch": 21.14, - "learning_rate": 6.670918367346939e-05, - "loss": 0.9797, + "epoch": 11.81, + "learning_rate": 5.15625e-05, + "loss": 0.4097, "step": 1045 }, { - "epoch": 21.16, - "learning_rate": 6.658163265306124e-05, - "loss": 0.919, + "epoch": 11.82, + "learning_rate": 5.1420454545454545e-05, + "loss": 0.4047, "step": 1046 }, { - "epoch": 21.18, - "learning_rate": 6.645408163265307e-05, - "loss": 0.9743, + "epoch": 11.83, + "learning_rate": 5.12784090909091e-05, + "loss": 0.4033, "step": 1047 }, { - "epoch": 21.21, - "learning_rate": 6.63265306122449e-05, - "loss": 0.9575, + "epoch": 11.84, + "learning_rate": 5.1136363636363635e-05, + "loss": 0.3774, "step": 1048 }, { - "epoch": 21.23, - "learning_rate": 6.619897959183674e-05, - "loss": 0.9861, + "epoch": 11.85, + "learning_rate": 5.099431818181818e-05, + "loss": 0.405, "step": 1049 }, { - "epoch": 21.25, - "learning_rate": 6.607142857142857e-05, - "loss": 0.9103, + "epoch": 11.86, + "learning_rate": 5.085227272727273e-05, + "loss": 0.3996, "step": 1050 }, { - "epoch": 21.27, - "learning_rate": 6.594387755102042e-05, - "loss": 0.993, + "epoch": 11.87, + "learning_rate": 5.071022727272727e-05, + "loss": 0.3885, "step": 1051 }, { - "epoch": 21.29, - "learning_rate": 6.581632653061225e-05, - "loss": 0.9668, + "epoch": 11.88, + "learning_rate": 5.056818181818183e-05, + "loss": 0.3914, "step": 1052 }, { - "epoch": 21.31, - "learning_rate": 6.568877551020408e-05, - "loss": 1.0008, + "epoch": 11.9, + "learning_rate": 5.042613636363637e-05, + "loss": 0.3908, "step": 1053 }, { - "epoch": 21.33, - "learning_rate": 6.556122448979592e-05, - "loss": 0.9825, + "epoch": 11.91, + "learning_rate": 5.0284090909090905e-05, + "loss": 0.3921, "step": 1054 }, { - "epoch": 21.35, - "learning_rate": 6.543367346938776e-05, - "loss": 1.0174, + "epoch": 11.92, + "learning_rate": 5.0142045454545464e-05, + "loss": 0.4077, "step": 1055 }, { - "epoch": 21.37, - "learning_rate": 6.530612244897959e-05, - "loss": 0.9685, + "epoch": 11.93, + "learning_rate": 5e-05, + "loss": 0.3973, "step": 1056 }, { - "epoch": 21.39, - "learning_rate": 6.517857142857143e-05, - "loss": 0.9265, + "epoch": 11.94, + "learning_rate": 4.985795454545455e-05, + "loss": 0.3986, "step": 1057 }, { - "epoch": 21.41, - "learning_rate": 6.505102040816326e-05, - "loss": 0.9495, + "epoch": 11.95, + "learning_rate": 4.971590909090909e-05, + "loss": 0.3938, "step": 1058 }, { - "epoch": 21.43, - "learning_rate": 6.49234693877551e-05, - "loss": 0.9541, + "epoch": 11.96, + "learning_rate": 4.957386363636364e-05, + "loss": 0.3897, "step": 1059 }, { - "epoch": 21.45, - "learning_rate": 6.479591836734694e-05, - "loss": 0.9299, + "epoch": 11.98, + "learning_rate": 4.943181818181818e-05, + "loss": 0.3965, "step": 1060 }, { - "epoch": 21.47, - "learning_rate": 6.466836734693877e-05, - "loss": 0.9625, + "epoch": 11.99, + "learning_rate": 4.9289772727272735e-05, + "loss": 0.3999, "step": 1061 }, { - "epoch": 21.49, - "learning_rate": 6.454081632653061e-05, - "loss": 1.0054, + "epoch": 12.0, + "learning_rate": 4.914772727272727e-05, + "loss": 0.3814, "step": 1062 }, { - "epoch": 21.51, - "learning_rate": 6.441326530612244e-05, - "loss": 0.9893, + "epoch": 12.01, + "learning_rate": 4.900568181818182e-05, + "loss": 0.3879, "step": 1063 }, { - "epoch": 21.53, - "learning_rate": 6.428571428571429e-05, - "loss": 0.9906, + "epoch": 12.02, + "learning_rate": 4.886363636363637e-05, + "loss": 0.3768, "step": 1064 }, { - "epoch": 21.55, - "learning_rate": 6.415816326530613e-05, - "loss": 0.9487, + "epoch": 12.03, + "learning_rate": 4.8721590909090915e-05, + "loss": 0.3813, "step": 1065 }, { - "epoch": 21.57, - "learning_rate": 6.403061224489796e-05, - "loss": 0.9728, + "epoch": 12.04, + "learning_rate": 4.857954545454545e-05, + "loss": 0.3759, "step": 1066 }, { - "epoch": 21.59, - "learning_rate": 6.390306122448981e-05, - "loss": 0.9883, + "epoch": 12.05, + "learning_rate": 4.8437500000000005e-05, + "loss": 0.3817, "step": 1067 }, { - "epoch": 21.61, - "learning_rate": 6.377551020408164e-05, - "loss": 1.053, + "epoch": 12.07, + "learning_rate": 4.829545454545455e-05, + "loss": 0.3773, "step": 1068 }, { - "epoch": 21.63, - "learning_rate": 6.364795918367348e-05, - "loss": 1.012, + "epoch": 12.08, + "learning_rate": 4.815340909090909e-05, + "loss": 0.3807, "step": 1069 }, { - "epoch": 21.65, - "learning_rate": 6.352040816326531e-05, - "loss": 0.962, + "epoch": 12.09, + "learning_rate": 4.801136363636364e-05, + "loss": 0.3757, "step": 1070 }, { - "epoch": 21.67, - "learning_rate": 6.339285714285714e-05, - "loss": 0.9955, + "epoch": 12.1, + "learning_rate": 4.7869318181818185e-05, + "loss": 0.3819, "step": 1071 }, { - "epoch": 21.69, - "learning_rate": 6.326530612244899e-05, - "loss": 0.9908, + "epoch": 12.11, + "learning_rate": 4.772727272727273e-05, + "loss": 0.3731, "step": 1072 }, { - "epoch": 21.71, - "learning_rate": 6.313775510204082e-05, - "loss": 1.0327, + "epoch": 12.12, + "learning_rate": 4.7585227272727276e-05, + "loss": 0.3706, "step": 1073 }, { - "epoch": 21.73, - "learning_rate": 6.301020408163265e-05, - "loss": 0.9255, + "epoch": 12.13, + "learning_rate": 4.744318181818182e-05, + "loss": 0.3762, "step": 1074 }, { - "epoch": 21.75, - "learning_rate": 6.28826530612245e-05, - "loss": 0.9268, + "epoch": 12.14, + "learning_rate": 4.7301136363636366e-05, + "loss": 0.3749, "step": 1075 }, { - "epoch": 21.77, - "learning_rate": 6.275510204081633e-05, - "loss": 0.9204, + "epoch": 12.16, + "learning_rate": 4.715909090909091e-05, + "loss": 0.3884, "step": 1076 }, { - "epoch": 21.79, - "learning_rate": 6.262755102040817e-05, - "loss": 0.9838, + "epoch": 12.17, + "learning_rate": 4.7017045454545456e-05, + "loss": 0.373, "step": 1077 }, { - "epoch": 21.81, - "learning_rate": 6.25e-05, - "loss": 0.954, + "epoch": 12.18, + "learning_rate": 4.6875e-05, + "loss": 0.374, "step": 1078 }, { - "epoch": 21.83, - "learning_rate": 6.237244897959183e-05, - "loss": 1.0102, + "epoch": 12.19, + "learning_rate": 4.6732954545454546e-05, + "loss": 0.3813, "step": 1079 }, { - "epoch": 21.85, - "learning_rate": 6.224489795918368e-05, - "loss": 0.916, + "epoch": 12.2, + "learning_rate": 4.659090909090909e-05, + "loss": 0.3745, "step": 1080 }, { - "epoch": 21.87, - "learning_rate": 6.211734693877551e-05, - "loss": 0.9939, + "epoch": 12.21, + "learning_rate": 4.6448863636363636e-05, + "loss": 0.3646, "step": 1081 }, { - "epoch": 21.89, - "learning_rate": 6.198979591836735e-05, - "loss": 0.9675, + "epoch": 12.22, + "learning_rate": 4.630681818181818e-05, + "loss": 0.3729, "step": 1082 }, { - "epoch": 21.91, - "learning_rate": 6.186224489795918e-05, - "loss": 0.9666, + "epoch": 12.24, + "learning_rate": 4.616477272727273e-05, + "loss": 0.3701, "step": 1083 }, { - "epoch": 21.93, - "learning_rate": 6.173469387755101e-05, - "loss": 0.9919, + "epoch": 12.25, + "learning_rate": 4.602272727272727e-05, + "loss": 0.3655, "step": 1084 }, { - "epoch": 21.95, - "learning_rate": 6.160714285714286e-05, - "loss": 1.0106, + "epoch": 12.26, + "learning_rate": 4.5880681818181817e-05, + "loss": 0.3758, "step": 1085 }, { - "epoch": 21.97, - "learning_rate": 6.14795918367347e-05, - "loss": 0.9982, + "epoch": 12.27, + "learning_rate": 4.573863636363637e-05, + "loss": 0.3682, "step": 1086 }, { - "epoch": 21.99, - "learning_rate": 6.135204081632653e-05, - "loss": 1.0137, + "epoch": 12.28, + "learning_rate": 4.5596590909090913e-05, + "loss": 0.3865, "step": 1087 }, { - "epoch": 22.01, - "learning_rate": 6.122448979591838e-05, - "loss": 0.9331, + "epoch": 12.29, + "learning_rate": 4.545454545454546e-05, + "loss": 0.363, "step": 1088 }, { - "epoch": 22.03, - "learning_rate": 6.109693877551021e-05, - "loss": 0.8834, + "epoch": 12.3, + "learning_rate": 4.5312500000000004e-05, + "loss": 0.3727, "step": 1089 }, { - "epoch": 22.06, - "learning_rate": 6.0969387755102046e-05, - "loss": 0.9757, + "epoch": 12.31, + "learning_rate": 4.517045454545455e-05, + "loss": 0.3827, "step": 1090 }, { - "epoch": 22.08, - "learning_rate": 6.084183673469388e-05, - "loss": 0.9038, + "epoch": 12.33, + "learning_rate": 4.5028409090909094e-05, + "loss": 0.3658, "step": 1091 }, { - "epoch": 22.1, - "learning_rate": 6.0714285714285715e-05, - "loss": 0.9097, + "epoch": 12.34, + "learning_rate": 4.488636363636364e-05, + "loss": 0.3844, "step": 1092 }, { - "epoch": 22.12, - "learning_rate": 6.058673469387756e-05, - "loss": 0.8972, + "epoch": 12.35, + "learning_rate": 4.4744318181818184e-05, + "loss": 0.3731, "step": 1093 }, { - "epoch": 22.14, - "learning_rate": 6.045918367346939e-05, - "loss": 0.8825, + "epoch": 12.36, + "learning_rate": 4.460227272727273e-05, + "loss": 0.3767, "step": 1094 }, { - "epoch": 22.16, - "learning_rate": 6.0331632653061234e-05, - "loss": 0.9814, + "epoch": 12.37, + "learning_rate": 4.4460227272727274e-05, + "loss": 0.3751, "step": 1095 }, { - "epoch": 22.18, - "learning_rate": 6.0204081632653065e-05, - "loss": 0.9874, + "epoch": 12.38, + "learning_rate": 4.431818181818182e-05, + "loss": 0.3718, "step": 1096 }, { - "epoch": 22.2, - "learning_rate": 6.0076530612244896e-05, - "loss": 0.912, + "epoch": 12.39, + "learning_rate": 4.4176136363636364e-05, + "loss": 0.3833, "step": 1097 }, { - "epoch": 22.22, - "learning_rate": 5.994897959183674e-05, - "loss": 0.9206, + "epoch": 12.4, + "learning_rate": 4.4034090909090916e-05, + "loss": 0.3673, "step": 1098 }, { - "epoch": 22.24, - "learning_rate": 5.982142857142857e-05, - "loss": 0.9497, + "epoch": 12.42, + "learning_rate": 4.3892045454545454e-05, + "loss": 0.3799, "step": 1099 }, { - "epoch": 22.26, - "learning_rate": 5.9693877551020416e-05, - "loss": 0.9269, + "epoch": 12.43, + "learning_rate": 4.375e-05, + "loss": 0.3661, "step": 1100 }, { - "epoch": 22.28, - "learning_rate": 5.956632653061225e-05, - "loss": 0.9452, + "epoch": 12.44, + "learning_rate": 4.360795454545455e-05, + "loss": 0.3554, "step": 1101 }, { - "epoch": 22.3, - "learning_rate": 5.9438775510204084e-05, - "loss": 0.9548, + "epoch": 12.45, + "learning_rate": 4.346590909090909e-05, + "loss": 0.3787, "step": 1102 }, { - "epoch": 22.32, - "learning_rate": 5.931122448979592e-05, - "loss": 0.9689, + "epoch": 12.46, + "learning_rate": 4.3323863636363635e-05, + "loss": 0.3695, "step": 1103 }, { - "epoch": 22.34, - "learning_rate": 5.918367346938776e-05, - "loss": 0.9455, + "epoch": 12.47, + "learning_rate": 4.318181818181819e-05, + "loss": 0.3777, "step": 1104 }, { - "epoch": 22.36, - "learning_rate": 5.905612244897959e-05, - "loss": 0.9409, + "epoch": 12.48, + "learning_rate": 4.303977272727273e-05, + "loss": 0.3693, "step": 1105 }, { - "epoch": 22.38, - "learning_rate": 5.8928571428571435e-05, - "loss": 0.9093, + "epoch": 12.49, + "learning_rate": 4.289772727272727e-05, + "loss": 0.3731, "step": 1106 }, { - "epoch": 22.4, - "learning_rate": 5.8801020408163266e-05, - "loss": 0.921, + "epoch": 12.51, + "learning_rate": 4.275568181818182e-05, + "loss": 0.3659, "step": 1107 }, { - "epoch": 22.42, - "learning_rate": 5.867346938775511e-05, - "loss": 0.9368, + "epoch": 12.52, + "learning_rate": 4.261363636363637e-05, + "loss": 0.3689, "step": 1108 }, { - "epoch": 22.44, - "learning_rate": 5.854591836734694e-05, - "loss": 0.907, + "epoch": 12.53, + "learning_rate": 4.247159090909091e-05, + "loss": 0.3625, "step": 1109 }, { - "epoch": 22.46, - "learning_rate": 5.841836734693877e-05, - "loss": 0.9126, + "epoch": 12.54, + "learning_rate": 4.232954545454546e-05, + "loss": 0.3874, "step": 1110 }, { - "epoch": 22.48, - "learning_rate": 5.8290816326530616e-05, - "loss": 0.9161, + "epoch": 12.55, + "learning_rate": 4.21875e-05, + "loss": 0.3651, "step": 1111 }, { - "epoch": 22.5, - "learning_rate": 5.816326530612245e-05, - "loss": 0.9542, + "epoch": 12.56, + "learning_rate": 4.204545454545455e-05, + "loss": 0.3639, "step": 1112 }, { - "epoch": 22.52, - "learning_rate": 5.803571428571429e-05, - "loss": 0.9775, + "epoch": 12.57, + "learning_rate": 4.190340909090909e-05, + "loss": 0.378, "step": 1113 }, { - "epoch": 22.54, - "learning_rate": 5.790816326530612e-05, - "loss": 1.0006, + "epoch": 12.59, + "learning_rate": 4.176136363636364e-05, + "loss": 0.3726, "step": 1114 }, { - "epoch": 22.56, - "learning_rate": 5.778061224489796e-05, - "loss": 0.8965, + "epoch": 12.6, + "learning_rate": 4.161931818181818e-05, + "loss": 0.3732, "step": 1115 }, { - "epoch": 22.58, - "learning_rate": 5.7653061224489805e-05, - "loss": 0.944, + "epoch": 12.61, + "learning_rate": 4.1477272727272734e-05, + "loss": 0.3673, "step": 1116 }, { - "epoch": 22.6, - "learning_rate": 5.7525510204081636e-05, - "loss": 0.9162, + "epoch": 12.62, + "learning_rate": 4.133522727272727e-05, + "loss": 0.3566, "step": 1117 }, { - "epoch": 22.62, - "learning_rate": 5.739795918367348e-05, - "loss": 0.9325, + "epoch": 12.63, + "learning_rate": 4.119318181818182e-05, + "loss": 0.3757, "step": 1118 }, { - "epoch": 22.64, - "learning_rate": 5.727040816326531e-05, - "loss": 0.8998, + "epoch": 12.64, + "learning_rate": 4.105113636363637e-05, + "loss": 0.3739, "step": 1119 }, { - "epoch": 22.66, - "learning_rate": 5.714285714285714e-05, - "loss": 0.9362, + "epoch": 12.65, + "learning_rate": 4.0909090909090915e-05, + "loss": 0.3768, "step": 1120 }, { - "epoch": 22.68, - "learning_rate": 5.7015306122448986e-05, - "loss": 0.9969, + "epoch": 12.66, + "learning_rate": 4.076704545454545e-05, + "loss": 0.3758, "step": 1121 }, { - "epoch": 22.7, - "learning_rate": 5.688775510204082e-05, - "loss": 0.9104, + "epoch": 12.68, + "learning_rate": 4.0625000000000005e-05, + "loss": 0.3655, "step": 1122 }, { - "epoch": 22.72, - "learning_rate": 5.676020408163265e-05, - "loss": 0.9746, + "epoch": 12.69, + "learning_rate": 4.048295454545455e-05, + "loss": 0.3673, "step": 1123 }, { - "epoch": 22.74, - "learning_rate": 5.663265306122449e-05, - "loss": 0.9821, + "epoch": 12.7, + "learning_rate": 4.034090909090909e-05, + "loss": 0.3683, "step": 1124 }, { - "epoch": 22.76, - "learning_rate": 5.650510204081633e-05, - "loss": 0.9526, + "epoch": 12.71, + "learning_rate": 4.019886363636364e-05, + "loss": 0.3569, "step": 1125 }, { - "epoch": 22.78, - "learning_rate": 5.637755102040817e-05, - "loss": 0.871, + "epoch": 12.72, + "learning_rate": 4.0056818181818185e-05, + "loss": 0.3741, "step": 1126 }, { - "epoch": 22.8, - "learning_rate": 5.6250000000000005e-05, - "loss": 0.9534, + "epoch": 12.73, + "learning_rate": 3.991477272727273e-05, + "loss": 0.3817, "step": 1127 }, { - "epoch": 22.82, - "learning_rate": 5.6122448979591836e-05, - "loss": 0.9616, + "epoch": 12.74, + "learning_rate": 3.9772727272727275e-05, + "loss": 0.3748, "step": 1128 }, { - "epoch": 22.84, - "learning_rate": 5.599489795918368e-05, - "loss": 0.9627, + "epoch": 12.75, + "learning_rate": 3.963068181818182e-05, + "loss": 0.3625, "step": 1129 }, { - "epoch": 22.86, - "learning_rate": 5.586734693877551e-05, - "loss": 0.9704, + "epoch": 12.77, + "learning_rate": 3.9488636363636366e-05, + "loss": 0.3656, "step": 1130 }, { - "epoch": 22.88, - "learning_rate": 5.5739795918367356e-05, - "loss": 0.9506, + "epoch": 12.78, + "learning_rate": 3.934659090909091e-05, + "loss": 0.3564, "step": 1131 }, { - "epoch": 22.9, - "learning_rate": 5.561224489795919e-05, - "loss": 0.9553, + "epoch": 12.79, + "learning_rate": 3.9204545454545456e-05, + "loss": 0.3737, "step": 1132 }, { - "epoch": 22.93, - "learning_rate": 5.548469387755102e-05, - "loss": 0.9294, + "epoch": 12.8, + "learning_rate": 3.90625e-05, + "loss": 0.3649, "step": 1133 }, { - "epoch": 22.95, - "learning_rate": 5.535714285714286e-05, - "loss": 0.8979, + "epoch": 12.81, + "learning_rate": 3.8920454545454546e-05, + "loss": 0.3728, "step": 1134 }, { - "epoch": 22.97, - "learning_rate": 5.522959183673469e-05, - "loss": 1.0004, + "epoch": 12.82, + "learning_rate": 3.877840909090909e-05, + "loss": 0.3865, "step": 1135 }, { - "epoch": 22.99, - "learning_rate": 5.510204081632653e-05, - "loss": 0.9821, + "epoch": 12.83, + "learning_rate": 3.8636363636363636e-05, + "loss": 0.3866, "step": 1136 }, { - "epoch": 23.01, - "learning_rate": 5.497448979591837e-05, - "loss": 0.9607, + "epoch": 12.85, + "learning_rate": 3.849431818181818e-05, + "loss": 0.3725, "step": 1137 }, { - "epoch": 23.03, - "learning_rate": 5.4846938775510206e-05, - "loss": 0.9757, + "epoch": 12.86, + "learning_rate": 3.835227272727273e-05, + "loss": 0.3662, "step": 1138 }, { - "epoch": 23.05, - "learning_rate": 5.471938775510205e-05, - "loss": 0.9096, + "epoch": 12.87, + "learning_rate": 3.821022727272727e-05, + "loss": 0.3742, "step": 1139 }, { - "epoch": 23.07, - "learning_rate": 5.459183673469388e-05, - "loss": 0.9144, + "epoch": 12.88, + "learning_rate": 3.8068181818181816e-05, + "loss": 0.3727, "step": 1140 }, { - "epoch": 23.09, - "learning_rate": 5.446428571428571e-05, - "loss": 0.8667, + "epoch": 12.89, + "learning_rate": 3.792613636363637e-05, + "loss": 0.3653, "step": 1141 }, { - "epoch": 23.11, - "learning_rate": 5.4336734693877556e-05, - "loss": 0.8993, + "epoch": 12.9, + "learning_rate": 3.778409090909091e-05, + "loss": 0.3631, "step": 1142 }, { - "epoch": 23.13, - "learning_rate": 5.420918367346939e-05, - "loss": 0.8964, + "epoch": 12.91, + "learning_rate": 3.764204545454545e-05, + "loss": 0.3674, "step": 1143 }, { - "epoch": 23.15, - "learning_rate": 5.408163265306123e-05, - "loss": 0.9173, + "epoch": 12.92, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.3598, "step": 1144 }, { - "epoch": 23.17, - "learning_rate": 5.395408163265306e-05, - "loss": 0.9019, + "epoch": 12.94, + "learning_rate": 3.735795454545455e-05, + "loss": 0.3697, "step": 1145 }, { - "epoch": 23.19, - "learning_rate": 5.382653061224489e-05, - "loss": 0.9303, + "epoch": 12.95, + "learning_rate": 3.721590909090909e-05, + "loss": 0.3639, "step": 1146 }, { - "epoch": 23.21, - "learning_rate": 5.369897959183674e-05, - "loss": 0.9268, + "epoch": 12.96, + "learning_rate": 3.707386363636364e-05, + "loss": 0.3597, "step": 1147 }, { - "epoch": 23.23, - "learning_rate": 5.3571428571428575e-05, - "loss": 0.8803, + "epoch": 12.97, + "learning_rate": 3.6931818181818184e-05, + "loss": 0.3815, "step": 1148 }, { - "epoch": 23.25, - "learning_rate": 5.344387755102041e-05, - "loss": 0.9197, + "epoch": 12.98, + "learning_rate": 3.678977272727273e-05, + "loss": 0.3477, "step": 1149 }, { - "epoch": 23.27, - "learning_rate": 5.331632653061225e-05, - "loss": 0.9204, + "epoch": 12.99, + "learning_rate": 3.6647727272727274e-05, + "loss": 0.3631, "step": 1150 }, { - "epoch": 23.29, - "learning_rate": 5.318877551020408e-05, - "loss": 0.8802, + "epoch": 13.0, + "learning_rate": 3.650568181818182e-05, + "loss": 0.3569, "step": 1151 }, { - "epoch": 23.31, - "learning_rate": 5.3061224489795926e-05, - "loss": 0.9044, + "epoch": 13.01, + "learning_rate": 3.6363636363636364e-05, + "loss": 0.3435, "step": 1152 }, { - "epoch": 23.33, - "learning_rate": 5.293367346938776e-05, - "loss": 0.8893, + "epoch": 13.03, + "learning_rate": 3.6221590909090916e-05, + "loss": 0.3504, "step": 1153 }, { - "epoch": 23.35, - "learning_rate": 5.280612244897959e-05, - "loss": 0.8928, + "epoch": 13.04, + "learning_rate": 3.6079545454545454e-05, + "loss": 0.3582, "step": 1154 }, { - "epoch": 23.37, - "learning_rate": 5.267857142857143e-05, - "loss": 0.9353, + "epoch": 13.05, + "learning_rate": 3.59375e-05, + "loss": 0.356, "step": 1155 }, { - "epoch": 23.39, - "learning_rate": 5.255102040816326e-05, - "loss": 0.9345, + "epoch": 13.06, + "learning_rate": 3.579545454545455e-05, + "loss": 0.3506, "step": 1156 }, { - "epoch": 23.41, - "learning_rate": 5.242346938775511e-05, - "loss": 0.9372, + "epoch": 13.07, + "learning_rate": 3.565340909090909e-05, + "loss": 0.3628, "step": 1157 }, { - "epoch": 23.43, - "learning_rate": 5.229591836734694e-05, - "loss": 0.9234, + "epoch": 13.08, + "learning_rate": 3.5511363636363635e-05, + "loss": 0.3494, "step": 1158 }, { - "epoch": 23.45, - "learning_rate": 5.2168367346938776e-05, - "loss": 0.9177, + "epoch": 13.09, + "learning_rate": 3.5369318181818186e-05, + "loss": 0.3653, "step": 1159 }, { - "epoch": 23.47, - "learning_rate": 5.2040816326530614e-05, - "loss": 0.8757, + "epoch": 13.11, + "learning_rate": 3.522727272727273e-05, + "loss": 0.3515, "step": 1160 }, { - "epoch": 23.49, - "learning_rate": 5.191326530612245e-05, - "loss": 0.9048, + "epoch": 13.12, + "learning_rate": 3.508522727272727e-05, + "loss": 0.3474, "step": 1161 }, { - "epoch": 23.51, - "learning_rate": 5.1785714285714296e-05, - "loss": 0.9248, + "epoch": 13.13, + "learning_rate": 3.494318181818182e-05, + "loss": 0.3469, "step": 1162 }, { - "epoch": 23.53, - "learning_rate": 5.1658163265306127e-05, - "loss": 0.9379, + "epoch": 13.14, + "learning_rate": 3.480113636363637e-05, + "loss": 0.3471, "step": 1163 }, { - "epoch": 23.55, - "learning_rate": 5.153061224489796e-05, - "loss": 0.8596, + "epoch": 13.15, + "learning_rate": 3.465909090909091e-05, + "loss": 0.355, "step": 1164 }, { - "epoch": 23.57, - "learning_rate": 5.14030612244898e-05, - "loss": 0.9751, + "epoch": 13.16, + "learning_rate": 3.451704545454546e-05, + "loss": 0.3532, "step": 1165 }, { - "epoch": 23.59, - "learning_rate": 5.127551020408163e-05, - "loss": 0.8842, + "epoch": 13.17, + "learning_rate": 3.4375e-05, + "loss": 0.3533, "step": 1166 }, { - "epoch": 23.61, - "learning_rate": 5.114795918367348e-05, - "loss": 0.8765, + "epoch": 13.18, + "learning_rate": 3.423295454545455e-05, + "loss": 0.3571, "step": 1167 }, { - "epoch": 23.63, - "learning_rate": 5.102040816326531e-05, - "loss": 0.8942, + "epoch": 13.2, + "learning_rate": 3.409090909090909e-05, + "loss": 0.3435, "step": 1168 }, { - "epoch": 23.65, - "learning_rate": 5.089285714285714e-05, - "loss": 0.938, + "epoch": 13.21, + "learning_rate": 3.394886363636364e-05, + "loss": 0.348, "step": 1169 }, { - "epoch": 23.67, - "learning_rate": 5.076530612244898e-05, - "loss": 0.8993, + "epoch": 13.22, + "learning_rate": 3.380681818181818e-05, + "loss": 0.3505, "step": 1170 }, { - "epoch": 23.69, - "learning_rate": 5.063775510204082e-05, - "loss": 0.9362, + "epoch": 13.23, + "learning_rate": 3.3664772727272734e-05, + "loss": 0.346, "step": 1171 }, { - "epoch": 23.71, - "learning_rate": 5.051020408163265e-05, - "loss": 0.9249, + "epoch": 13.24, + "learning_rate": 3.352272727272727e-05, + "loss": 0.3568, "step": 1172 }, { - "epoch": 23.73, - "learning_rate": 5.0382653061224496e-05, - "loss": 0.9055, + "epoch": 13.25, + "learning_rate": 3.338068181818182e-05, + "loss": 0.3548, "step": 1173 }, { - "epoch": 23.75, - "learning_rate": 5.025510204081633e-05, - "loss": 0.8967, + "epoch": 13.26, + "learning_rate": 3.323863636363637e-05, + "loss": 0.352, "step": 1174 }, { - "epoch": 23.77, - "learning_rate": 5.012755102040817e-05, - "loss": 0.8795, + "epoch": 13.27, + "learning_rate": 3.3096590909090915e-05, + "loss": 0.3561, "step": 1175 }, { - "epoch": 23.8, - "learning_rate": 5e-05, - "loss": 0.9452, + "epoch": 13.29, + "learning_rate": 3.295454545454545e-05, + "loss": 0.3424, "step": 1176 }, { - "epoch": 23.82, - "learning_rate": 4.987244897959184e-05, - "loss": 0.926, + "epoch": 13.3, + "learning_rate": 3.2812500000000005e-05, + "loss": 0.3453, "step": 1177 }, { - "epoch": 23.84, - "learning_rate": 4.974489795918368e-05, - "loss": 0.8948, + "epoch": 13.31, + "learning_rate": 3.267045454545455e-05, + "loss": 0.347, "step": 1178 }, { - "epoch": 23.86, - "learning_rate": 4.961734693877551e-05, - "loss": 0.8926, + "epoch": 13.32, + "learning_rate": 3.252840909090909e-05, + "loss": 0.3526, "step": 1179 }, { - "epoch": 23.88, - "learning_rate": 4.9489795918367346e-05, - "loss": 0.8949, + "epoch": 13.33, + "learning_rate": 3.238636363636364e-05, + "loss": 0.3634, "step": 1180 }, { - "epoch": 23.9, - "learning_rate": 4.9362244897959184e-05, - "loss": 0.9648, + "epoch": 13.34, + "learning_rate": 3.2244318181818185e-05, + "loss": 0.3591, "step": 1181 }, { - "epoch": 23.92, - "learning_rate": 4.923469387755102e-05, - "loss": 0.9599, + "epoch": 13.35, + "learning_rate": 3.210227272727273e-05, + "loss": 0.3449, "step": 1182 }, { - "epoch": 23.94, - "learning_rate": 4.910714285714286e-05, - "loss": 0.9603, + "epoch": 13.36, + "learning_rate": 3.1960227272727275e-05, + "loss": 0.3362, "step": 1183 }, { - "epoch": 23.96, - "learning_rate": 4.89795918367347e-05, - "loss": 0.9302, + "epoch": 13.38, + "learning_rate": 3.181818181818182e-05, + "loss": 0.3613, "step": 1184 }, { - "epoch": 23.98, - "learning_rate": 4.8852040816326534e-05, - "loss": 0.9261, + "epoch": 13.39, + "learning_rate": 3.1676136363636365e-05, + "loss": 0.3509, "step": 1185 }, { - "epoch": 24.0, - "learning_rate": 4.872448979591837e-05, - "loss": 0.9257, + "epoch": 13.4, + "learning_rate": 3.153409090909091e-05, + "loss": 0.3534, "step": 1186 }, { - "epoch": 24.02, - "learning_rate": 4.859693877551021e-05, - "loss": 0.8725, + "epoch": 13.41, + "learning_rate": 3.1392045454545456e-05, + "loss": 0.3452, "step": 1187 }, { - "epoch": 24.04, - "learning_rate": 4.846938775510204e-05, - "loss": 0.8486, + "epoch": 13.42, + "learning_rate": 3.125e-05, + "loss": 0.3659, "step": 1188 }, { - "epoch": 24.06, - "learning_rate": 4.834183673469388e-05, - "loss": 0.8457, + "epoch": 13.43, + "learning_rate": 3.1107954545454546e-05, + "loss": 0.3445, "step": 1189 }, { - "epoch": 24.08, - "learning_rate": 4.8214285714285716e-05, - "loss": 0.7848, + "epoch": 13.44, + "learning_rate": 3.096590909090909e-05, + "loss": 0.3536, "step": 1190 }, { - "epoch": 24.1, - "learning_rate": 4.8086734693877554e-05, - "loss": 0.8885, + "epoch": 13.46, + "learning_rate": 3.0823863636363636e-05, + "loss": 0.3588, "step": 1191 }, { - "epoch": 24.12, - "learning_rate": 4.795918367346939e-05, - "loss": 0.9099, + "epoch": 13.47, + "learning_rate": 3.068181818181818e-05, + "loss": 0.347, "step": 1192 }, { - "epoch": 24.14, - "learning_rate": 4.783163265306123e-05, - "loss": 0.9147, + "epoch": 13.48, + "learning_rate": 3.053977272727273e-05, + "loss": 0.3537, "step": 1193 }, { - "epoch": 24.16, - "learning_rate": 4.7704081632653066e-05, - "loss": 0.8781, + "epoch": 13.49, + "learning_rate": 3.039772727272727e-05, + "loss": 0.3516, "step": 1194 }, { - "epoch": 24.18, - "learning_rate": 4.7576530612244904e-05, - "loss": 0.8847, + "epoch": 13.5, + "learning_rate": 3.025568181818182e-05, + "loss": 0.3587, "step": 1195 }, { - "epoch": 24.2, - "learning_rate": 4.744897959183674e-05, - "loss": 0.9041, + "epoch": 13.51, + "learning_rate": 3.0113636363636365e-05, + "loss": 0.3494, "step": 1196 }, { - "epoch": 24.22, - "learning_rate": 4.732142857142857e-05, - "loss": 0.8639, + "epoch": 13.52, + "learning_rate": 2.9971590909090913e-05, + "loss": 0.3341, "step": 1197 }, { - "epoch": 24.24, - "learning_rate": 4.719387755102041e-05, - "loss": 0.8831, + "epoch": 13.53, + "learning_rate": 2.9829545454545455e-05, + "loss": 0.3563, "step": 1198 }, { - "epoch": 24.26, - "learning_rate": 4.706632653061225e-05, - "loss": 0.9063, + "epoch": 13.55, + "learning_rate": 2.96875e-05, + "loss": 0.3534, "step": 1199 }, { - "epoch": 24.28, - "learning_rate": 4.6938775510204086e-05, - "loss": 0.8753, + "epoch": 13.56, + "learning_rate": 2.954545454545455e-05, + "loss": 0.3632, "step": 1200 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 1.7762072708554138e+18, + "total_flos": 1.6376151934208102e+18, "trial_name": null, "trial_params": null } diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-1200/training_args.bin +++ b/checkpoint-1200/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-1300/adapter_model.bin b/checkpoint-1300/adapter_model.bin index 2a46567e8a873934c32b5f6716c212c0583d11c5..4add2ddf3a60dfcddc63dc962185fc8c6748fc9c 100644 --- a/checkpoint-1300/adapter_model.bin +++ b/checkpoint-1300/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:557e20c98ce81a1b2174b31161d704a79e1057d7cba9a8a3a044698f748e1a96 +oid sha256:fd38544a3f483f339db731e48dedd495f34e33524ab8ebb4dcaf56ed451e74e2 size 39409357 diff --git a/checkpoint-1300/optimizer.pt b/checkpoint-1300/optimizer.pt index dc193c111e311e6c8b784c0d8a731044dca46428..51ce5db0fc1ad7ec09ad7d2d197ad5efee8c0a7e 100644 --- a/checkpoint-1300/optimizer.pt +++ b/checkpoint-1300/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2311ed46f1164ff3772d9c69e8f7347964538333edd4438abef82e7683876dc +oid sha256:3050127d2eb3d707b91667223062e0f617e707678ffe8035de6051275306728f size 78844421 diff --git a/checkpoint-1300/rng_state.pth b/checkpoint-1300/rng_state.pth index c16fb5959f3252446d7952ca7b642c0f6b79609e..c1c4c79fcb2148e5de373db02c3ee2987200a3a9 100644 --- a/checkpoint-1300/rng_state.pth +++ b/checkpoint-1300/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2233b71d28d82b8331de6224e1abb884bd9108ce746c680fd1c4d641c935668 +oid sha256:3e46ce4eb16240da9f3a8b3066acb6f59a234249ee2a3052f3323786da479838 size 14575 diff --git a/checkpoint-1300/scheduler.pt b/checkpoint-1300/scheduler.pt index 6d80e9e6b9dff077ceec581106fad9de9f7ae96c..9e88dcb049f0353ddd8d1df6c49c0ea83286fa4e 100644 --- a/checkpoint-1300/scheduler.pt +++ b/checkpoint-1300/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af8fb4309008de879e60055efbf110ed70c676d2270a50eb4a6b5b40910d4878 +oid sha256:d75bc5140c934078e850991a6d79c9d5e5d870823a1e52dc1b01bf1b267613ec size 627 diff --git a/checkpoint-1300/trainer_state.json b/checkpoint-1300/trainer_state.json index b03ed0161efaed231027c7690ba0e05c13ddb02c..e7fd57393f6115dc085faf1acc1b5c47dc2fe54a 100644 --- a/checkpoint-1300/trainer_state.json +++ b/checkpoint-1300/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 26.304141637685742, + "epoch": 14.68667255075022, "eval_steps": 500, "global_step": 1300, "is_hyper_param_search": false, @@ -9,7811 +9,7811 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 }, { - "epoch": 14.18, - "learning_rate": 0.00011058673469387754, - "loss": 1.2069, + "epoch": 7.92, + "learning_rate": 0.00010042613636363636, + "loss": 0.6857, "step": 701 }, { - "epoch": 14.2, - "learning_rate": 0.00011045918367346939, - "loss": 1.1968, + "epoch": 7.93, + "learning_rate": 0.00010028409090909093, + "loss": 0.6677, "step": 702 }, { - "epoch": 14.22, - "learning_rate": 0.00011033163265306123, - "loss": 1.2236, + "epoch": 7.94, + "learning_rate": 0.00010014204545454546, + "loss": 0.6697, "step": 703 }, { - "epoch": 14.24, - "learning_rate": 0.00011020408163265306, - "loss": 1.1942, + "epoch": 7.95, + "learning_rate": 0.0001, + "loss": 0.6375, "step": 704 }, { - "epoch": 14.26, - "learning_rate": 0.0001100765306122449, - "loss": 1.2561, + "epoch": 7.96, + "learning_rate": 9.985795454545455e-05, + "loss": 0.6572, "step": 705 }, { - "epoch": 14.29, - "learning_rate": 0.00010994897959183674, - "loss": 1.1839, + "epoch": 7.98, + "learning_rate": 9.97159090909091e-05, + "loss": 0.668, "step": 706 }, { - "epoch": 14.31, - "learning_rate": 0.00010982142857142858, - "loss": 1.2128, + "epoch": 7.99, + "learning_rate": 9.957386363636364e-05, + "loss": 0.6797, "step": 707 }, { - "epoch": 14.33, - "learning_rate": 0.00010969387755102041, - "loss": 1.3086, + "epoch": 8.0, + "learning_rate": 9.943181818181819e-05, + "loss": 0.6784, "step": 708 }, { - "epoch": 14.35, - "learning_rate": 0.00010956632653061226, - "loss": 1.2379, + "epoch": 8.01, + "learning_rate": 9.928977272727273e-05, + "loss": 0.6192, "step": 709 }, { - "epoch": 14.37, - "learning_rate": 0.0001094387755102041, - "loss": 1.176, + "epoch": 8.02, + "learning_rate": 9.914772727272728e-05, + "loss": 0.6287, "step": 710 }, { - "epoch": 14.39, - "learning_rate": 0.00010931122448979592, - "loss": 1.2105, + "epoch": 8.03, + "learning_rate": 9.900568181818183e-05, + "loss": 0.6034, "step": 711 }, { - "epoch": 14.41, - "learning_rate": 0.00010918367346938776, - "loss": 1.2149, + "epoch": 8.04, + "learning_rate": 9.886363636363637e-05, + "loss": 0.6167, "step": 712 }, { - "epoch": 14.43, - "learning_rate": 0.0001090561224489796, - "loss": 1.2392, + "epoch": 8.06, + "learning_rate": 9.872159090909091e-05, + "loss": 0.6353, "step": 713 }, { - "epoch": 14.45, - "learning_rate": 0.00010892857142857142, - "loss": 1.2471, + "epoch": 8.07, + "learning_rate": 9.857954545454547e-05, + "loss": 0.6222, "step": 714 }, { - "epoch": 14.47, - "learning_rate": 0.00010880102040816327, - "loss": 1.2561, + "epoch": 8.08, + "learning_rate": 9.84375e-05, + "loss": 0.5963, "step": 715 }, { - "epoch": 14.49, - "learning_rate": 0.00010867346938775511, - "loss": 1.2179, + "epoch": 8.09, + "learning_rate": 9.829545454545455e-05, + "loss": 0.6042, "step": 716 }, { - "epoch": 14.51, - "learning_rate": 0.00010854591836734696, - "loss": 1.2459, + "epoch": 8.1, + "learning_rate": 9.81534090909091e-05, + "loss": 0.612, "step": 717 }, { - "epoch": 14.53, - "learning_rate": 0.00010841836734693877, - "loss": 1.2933, + "epoch": 8.11, + "learning_rate": 9.801136363636364e-05, + "loss": 0.6069, "step": 718 }, { - "epoch": 14.55, - "learning_rate": 0.00010829081632653062, - "loss": 1.2862, + "epoch": 8.12, + "learning_rate": 9.786931818181818e-05, + "loss": 0.6001, "step": 719 }, { - "epoch": 14.57, - "learning_rate": 0.00010816326530612246, - "loss": 1.2976, + "epoch": 8.13, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6007, "step": 720 }, { - "epoch": 14.59, - "learning_rate": 0.00010803571428571428, - "loss": 1.231, + "epoch": 8.15, + "learning_rate": 9.758522727272727e-05, + "loss": 0.6079, "step": 721 }, { - "epoch": 14.61, - "learning_rate": 0.00010790816326530613, - "loss": 1.2464, + "epoch": 8.16, + "learning_rate": 9.744318181818183e-05, + "loss": 0.6216, "step": 722 }, { - "epoch": 14.63, - "learning_rate": 0.00010778061224489797, - "loss": 1.2181, + "epoch": 8.17, + "learning_rate": 9.730113636363637e-05, + "loss": 0.6321, "step": 723 }, { - "epoch": 14.65, - "learning_rate": 0.00010765306122448979, - "loss": 1.3307, + "epoch": 8.18, + "learning_rate": 9.71590909090909e-05, + "loss": 0.6044, "step": 724 }, { - "epoch": 14.67, - "learning_rate": 0.00010752551020408163, - "loss": 1.1723, + "epoch": 8.19, + "learning_rate": 9.701704545454547e-05, + "loss": 0.6028, "step": 725 }, { - "epoch": 14.69, - "learning_rate": 0.00010739795918367348, - "loss": 1.1528, + "epoch": 8.2, + "learning_rate": 9.687500000000001e-05, + "loss": 0.6098, "step": 726 }, { - "epoch": 14.71, - "learning_rate": 0.0001072704081632653, - "loss": 1.215, + "epoch": 8.21, + "learning_rate": 9.673295454545454e-05, + "loss": 0.6032, "step": 727 }, { - "epoch": 14.73, - "learning_rate": 0.00010714285714285715, - "loss": 1.2624, + "epoch": 8.22, + "learning_rate": 9.65909090909091e-05, + "loss": 0.6298, "step": 728 }, { - "epoch": 14.75, - "learning_rate": 0.00010701530612244898, - "loss": 1.3117, + "epoch": 8.24, + "learning_rate": 9.644886363636365e-05, + "loss": 0.6115, "step": 729 }, { - "epoch": 14.77, - "learning_rate": 0.00010688775510204083, - "loss": 1.2572, + "epoch": 8.25, + "learning_rate": 9.630681818181818e-05, + "loss": 0.6052, "step": 730 }, { - "epoch": 14.79, - "learning_rate": 0.00010676020408163266, - "loss": 1.222, + "epoch": 8.26, + "learning_rate": 9.616477272727274e-05, + "loss": 0.6097, "step": 731 }, { - "epoch": 14.81, - "learning_rate": 0.0001066326530612245, - "loss": 1.2881, + "epoch": 8.27, + "learning_rate": 9.602272727272728e-05, + "loss": 0.6062, "step": 732 }, { - "epoch": 14.83, - "learning_rate": 0.00010650510204081635, - "loss": 1.2676, + "epoch": 8.28, + "learning_rate": 9.588068181818183e-05, + "loss": 0.5984, "step": 733 }, { - "epoch": 14.85, - "learning_rate": 0.00010637755102040816, - "loss": 1.2734, + "epoch": 8.29, + "learning_rate": 9.573863636363637e-05, + "loss": 0.6432, "step": 734 }, { - "epoch": 14.87, - "learning_rate": 0.00010625000000000001, - "loss": 1.2885, + "epoch": 8.3, + "learning_rate": 9.559659090909092e-05, + "loss": 0.5814, "step": 735 }, { - "epoch": 14.89, - "learning_rate": 0.00010612244897959185, - "loss": 1.2764, + "epoch": 8.31, + "learning_rate": 9.545454545454546e-05, + "loss": 0.5965, "step": 736 }, { - "epoch": 14.91, - "learning_rate": 0.00010599489795918367, - "loss": 1.3267, + "epoch": 8.33, + "learning_rate": 9.53125e-05, + "loss": 0.6102, "step": 737 }, { - "epoch": 14.93, - "learning_rate": 0.00010586734693877551, - "loss": 1.2445, + "epoch": 8.34, + "learning_rate": 9.517045454545455e-05, + "loss": 0.5849, "step": 738 }, { - "epoch": 14.95, - "learning_rate": 0.00010573979591836736, - "loss": 1.3359, + "epoch": 8.35, + "learning_rate": 9.50284090909091e-05, + "loss": 0.6062, "step": 739 }, { - "epoch": 14.97, - "learning_rate": 0.00010561224489795918, - "loss": 1.2508, + "epoch": 8.36, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6031, "step": 740 }, { - "epoch": 14.99, - "learning_rate": 0.00010548469387755102, - "loss": 1.2227, + "epoch": 8.37, + "learning_rate": 9.474431818181819e-05, + "loss": 0.5932, "step": 741 }, { - "epoch": 15.01, - "learning_rate": 0.00010535714285714286, - "loss": 1.1889, + "epoch": 8.38, + "learning_rate": 9.460227272727273e-05, + "loss": 0.589, "step": 742 }, { - "epoch": 15.03, - "learning_rate": 0.00010522959183673471, - "loss": 1.1919, + "epoch": 8.39, + "learning_rate": 9.446022727272728e-05, + "loss": 0.6096, "step": 743 }, { - "epoch": 15.05, - "learning_rate": 0.00010510204081632653, - "loss": 1.2383, + "epoch": 8.41, + "learning_rate": 9.431818181818182e-05, + "loss": 0.601, "step": 744 }, { - "epoch": 15.07, - "learning_rate": 0.00010497448979591837, - "loss": 1.2401, + "epoch": 8.42, + "learning_rate": 9.417613636363637e-05, + "loss": 0.5798, "step": 745 }, { - "epoch": 15.09, - "learning_rate": 0.00010484693877551021, - "loss": 1.2015, + "epoch": 8.43, + "learning_rate": 9.403409090909091e-05, + "loss": 0.59, "step": 746 }, { - "epoch": 15.11, - "learning_rate": 0.00010471938775510203, - "loss": 1.1509, + "epoch": 8.44, + "learning_rate": 9.389204545454546e-05, + "loss": 0.5988, "step": 747 }, { - "epoch": 15.13, - "learning_rate": 0.00010459183673469388, - "loss": 1.1878, + "epoch": 8.45, + "learning_rate": 9.375e-05, + "loss": 0.5591, "step": 748 }, { - "epoch": 15.16, - "learning_rate": 0.00010446428571428572, - "loss": 1.1706, + "epoch": 8.46, + "learning_rate": 9.360795454545455e-05, + "loss": 0.5939, "step": 749 }, { - "epoch": 15.18, - "learning_rate": 0.00010433673469387755, - "loss": 1.1285, + "epoch": 8.47, + "learning_rate": 9.346590909090909e-05, + "loss": 0.5886, "step": 750 }, { - "epoch": 15.2, - "learning_rate": 0.0001042091836734694, - "loss": 1.1608, + "epoch": 8.48, + "learning_rate": 9.332386363636364e-05, + "loss": 0.5994, "step": 751 }, { - "epoch": 15.22, - "learning_rate": 0.00010408163265306123, - "loss": 1.1178, + "epoch": 8.5, + "learning_rate": 9.318181818181818e-05, + "loss": 0.5821, "step": 752 }, { - "epoch": 15.24, - "learning_rate": 0.00010395408163265306, - "loss": 1.1293, + "epoch": 8.51, + "learning_rate": 9.303977272727273e-05, + "loss": 0.602, "step": 753 }, { - "epoch": 15.26, - "learning_rate": 0.0001038265306122449, - "loss": 1.2306, + "epoch": 8.52, + "learning_rate": 9.289772727272727e-05, + "loss": 0.5708, "step": 754 }, { - "epoch": 15.28, - "learning_rate": 0.00010369897959183675, - "loss": 1.1541, + "epoch": 8.53, + "learning_rate": 9.275568181818183e-05, + "loss": 0.5902, "step": 755 }, { - "epoch": 15.3, - "learning_rate": 0.00010357142857142859, - "loss": 1.1702, + "epoch": 8.54, + "learning_rate": 9.261363636363636e-05, + "loss": 0.6053, "step": 756 }, { - "epoch": 15.32, - "learning_rate": 0.00010344387755102041, - "loss": 1.2119, + "epoch": 8.55, + "learning_rate": 9.247159090909091e-05, + "loss": 0.5797, "step": 757 }, { - "epoch": 15.34, - "learning_rate": 0.00010331632653061225, - "loss": 1.2239, + "epoch": 8.56, + "learning_rate": 9.232954545454547e-05, + "loss": 0.5965, "step": 758 }, { - "epoch": 15.36, - "learning_rate": 0.0001031887755102041, - "loss": 1.2019, + "epoch": 8.57, + "learning_rate": 9.21875e-05, + "loss": 0.5738, "step": 759 }, { - "epoch": 15.38, - "learning_rate": 0.00010306122448979591, - "loss": 1.2197, + "epoch": 8.59, + "learning_rate": 9.204545454545454e-05, + "loss": 0.5819, "step": 760 }, { - "epoch": 15.4, - "learning_rate": 0.00010293367346938776, - "loss": 1.1769, + "epoch": 8.6, + "learning_rate": 9.19034090909091e-05, + "loss": 0.5994, "step": 761 }, { - "epoch": 15.42, - "learning_rate": 0.0001028061224489796, - "loss": 1.1907, + "epoch": 8.61, + "learning_rate": 9.176136363636363e-05, + "loss": 0.5738, "step": 762 }, { - "epoch": 15.44, - "learning_rate": 0.00010267857142857142, - "loss": 1.2089, + "epoch": 8.62, + "learning_rate": 9.161931818181818e-05, + "loss": 0.5663, "step": 763 }, { - "epoch": 15.46, - "learning_rate": 0.00010255102040816327, - "loss": 1.1335, + "epoch": 8.63, + "learning_rate": 9.147727272727274e-05, + "loss": 0.5798, "step": 764 }, { - "epoch": 15.48, - "learning_rate": 0.00010242346938775511, - "loss": 1.1633, + "epoch": 8.64, + "learning_rate": 9.133522727272727e-05, + "loss": 0.5705, "step": 765 }, { - "epoch": 15.5, - "learning_rate": 0.00010229591836734695, - "loss": 1.1578, + "epoch": 8.65, + "learning_rate": 9.119318181818183e-05, + "loss": 0.5943, "step": 766 }, { - "epoch": 15.52, - "learning_rate": 0.00010216836734693877, - "loss": 1.2236, + "epoch": 8.67, + "learning_rate": 9.105113636363637e-05, + "loss": 0.6019, "step": 767 }, { - "epoch": 15.54, - "learning_rate": 0.00010204081632653062, - "loss": 1.1941, + "epoch": 8.68, + "learning_rate": 9.090909090909092e-05, + "loss": 0.5733, "step": 768 }, { - "epoch": 15.56, - "learning_rate": 0.00010191326530612246, - "loss": 1.2666, + "epoch": 8.69, + "learning_rate": 9.076704545454546e-05, + "loss": 0.575, "step": 769 }, { - "epoch": 15.58, - "learning_rate": 0.00010178571428571428, - "loss": 1.1232, + "epoch": 8.7, + "learning_rate": 9.062500000000001e-05, + "loss": 0.5675, "step": 770 }, { - "epoch": 15.6, - "learning_rate": 0.00010165816326530612, - "loss": 1.2242, + "epoch": 8.71, + "learning_rate": 9.048295454545455e-05, + "loss": 0.566, "step": 771 }, { - "epoch": 15.62, - "learning_rate": 0.00010153061224489797, - "loss": 1.1852, + "epoch": 8.72, + "learning_rate": 9.03409090909091e-05, + "loss": 0.5513, "step": 772 }, { - "epoch": 15.64, - "learning_rate": 0.0001014030612244898, - "loss": 1.2626, + "epoch": 8.73, + "learning_rate": 9.019886363636364e-05, + "loss": 0.5682, "step": 773 }, { - "epoch": 15.66, - "learning_rate": 0.00010127551020408164, - "loss": 1.1873, + "epoch": 8.74, + "learning_rate": 9.005681818181819e-05, + "loss": 0.5508, "step": 774 }, { - "epoch": 15.68, - "learning_rate": 0.00010114795918367349, - "loss": 1.3005, + "epoch": 8.76, + "learning_rate": 8.991477272727273e-05, + "loss": 0.5668, "step": 775 }, { - "epoch": 15.7, - "learning_rate": 0.0001010204081632653, - "loss": 1.1904, + "epoch": 8.77, + "learning_rate": 8.977272727272728e-05, + "loss": 0.569, "step": 776 }, { - "epoch": 15.72, - "learning_rate": 0.00010089285714285715, - "loss": 1.2927, + "epoch": 8.78, + "learning_rate": 8.963068181818182e-05, + "loss": 0.5897, "step": 777 }, { - "epoch": 15.74, - "learning_rate": 0.00010076530612244899, - "loss": 1.179, + "epoch": 8.79, + "learning_rate": 8.948863636363637e-05, + "loss": 0.5738, "step": 778 }, { - "epoch": 15.76, - "learning_rate": 0.00010063775510204084, - "loss": 1.2027, + "epoch": 8.8, + "learning_rate": 8.934659090909091e-05, + "loss": 0.5511, "step": 779 }, { - "epoch": 15.78, - "learning_rate": 0.00010051020408163265, - "loss": 1.2428, + "epoch": 8.81, + "learning_rate": 8.920454545454546e-05, + "loss": 0.5659, "step": 780 }, { - "epoch": 15.8, - "learning_rate": 0.0001003826530612245, - "loss": 1.2324, + "epoch": 8.82, + "learning_rate": 8.90625e-05, + "loss": 0.5649, "step": 781 }, { - "epoch": 15.82, - "learning_rate": 0.00010025510204081634, - "loss": 1.1251, + "epoch": 8.83, + "learning_rate": 8.892045454545455e-05, + "loss": 0.5618, "step": 782 }, { - "epoch": 15.84, - "learning_rate": 0.00010012755102040816, - "loss": 1.2405, + "epoch": 8.85, + "learning_rate": 8.87784090909091e-05, + "loss": 0.5602, "step": 783 }, { - "epoch": 15.86, - "learning_rate": 0.0001, - "loss": 1.2005, + "epoch": 8.86, + "learning_rate": 8.863636363636364e-05, + "loss": 0.5723, "step": 784 }, { - "epoch": 15.88, - "learning_rate": 9.987244897959184e-05, - "loss": 1.2259, + "epoch": 8.87, + "learning_rate": 8.849431818181818e-05, + "loss": 0.5816, "step": 785 }, { - "epoch": 15.9, - "learning_rate": 9.974489795918368e-05, - "loss": 1.1576, + "epoch": 8.88, + "learning_rate": 8.835227272727273e-05, + "loss": 0.555, "step": 786 }, { - "epoch": 15.92, - "learning_rate": 9.961734693877551e-05, - "loss": 1.1834, + "epoch": 8.89, + "learning_rate": 8.821022727272727e-05, + "loss": 0.5563, "step": 787 }, { - "epoch": 15.94, - "learning_rate": 9.948979591836736e-05, - "loss": 1.2396, + "epoch": 8.9, + "learning_rate": 8.806818181818183e-05, + "loss": 0.554, "step": 788 }, { - "epoch": 15.96, - "learning_rate": 9.936224489795919e-05, - "loss": 1.1865, + "epoch": 8.91, + "learning_rate": 8.792613636363636e-05, + "loss": 0.5671, "step": 789 }, { - "epoch": 15.98, - "learning_rate": 9.923469387755102e-05, - "loss": 1.2356, + "epoch": 8.92, + "learning_rate": 8.778409090909091e-05, + "loss": 0.5485, "step": 790 }, { - "epoch": 16.01, - "learning_rate": 9.910714285714286e-05, - "loss": 1.2639, + "epoch": 8.94, + "learning_rate": 8.764204545454547e-05, + "loss": 0.5712, "step": 791 }, { - "epoch": 16.03, - "learning_rate": 9.897959183673469e-05, - "loss": 1.1216, + "epoch": 8.95, + "learning_rate": 8.75e-05, + "loss": 0.5507, "step": 792 }, { - "epoch": 16.05, - "learning_rate": 9.885204081632652e-05, - "loss": 1.1051, + "epoch": 8.96, + "learning_rate": 8.735795454545454e-05, + "loss": 0.5718, "step": 793 }, { - "epoch": 16.07, - "learning_rate": 9.872448979591837e-05, - "loss": 1.0864, + "epoch": 8.97, + "learning_rate": 8.72159090909091e-05, + "loss": 0.5585, "step": 794 }, { - "epoch": 16.09, - "learning_rate": 9.859693877551021e-05, - "loss": 1.182, + "epoch": 8.98, + "learning_rate": 8.707386363636363e-05, + "loss": 0.5563, "step": 795 }, { - "epoch": 16.11, - "learning_rate": 9.846938775510204e-05, - "loss": 1.1272, + "epoch": 8.99, + "learning_rate": 8.693181818181818e-05, + "loss": 0.581, "step": 796 }, { - "epoch": 16.13, - "learning_rate": 9.834183673469389e-05, - "loss": 1.1946, + "epoch": 9.0, + "learning_rate": 8.678977272727274e-05, + "loss": 0.5511, "step": 797 }, { - "epoch": 16.15, - "learning_rate": 9.821428571428572e-05, - "loss": 1.0875, + "epoch": 9.02, + "learning_rate": 8.664772727272727e-05, + "loss": 0.5103, "step": 798 }, { - "epoch": 16.17, - "learning_rate": 9.808673469387756e-05, - "loss": 1.1671, + "epoch": 9.03, + "learning_rate": 8.650568181818183e-05, + "loss": 0.5323, "step": 799 }, { - "epoch": 16.19, - "learning_rate": 9.79591836734694e-05, - "loss": 1.1502, + "epoch": 9.04, + "learning_rate": 8.636363636363637e-05, + "loss": 0.5092, "step": 800 }, { - "epoch": 16.21, - "learning_rate": 9.783163265306124e-05, - "loss": 1.19, + "epoch": 9.05, + "learning_rate": 8.62215909090909e-05, + "loss": 0.5247, "step": 801 }, { - "epoch": 16.23, - "learning_rate": 9.770408163265307e-05, - "loss": 1.1258, + "epoch": 9.06, + "learning_rate": 8.607954545454546e-05, + "loss": 0.5403, "step": 802 }, { - "epoch": 16.25, - "learning_rate": 9.75765306122449e-05, - "loss": 1.1765, + "epoch": 9.07, + "learning_rate": 8.593750000000001e-05, + "loss": 0.5252, "step": 803 }, { - "epoch": 16.27, - "learning_rate": 9.744897959183674e-05, - "loss": 1.1217, + "epoch": 9.08, + "learning_rate": 8.579545454545454e-05, + "loss": 0.5296, "step": 804 }, { - "epoch": 16.29, - "learning_rate": 9.732142857142858e-05, - "loss": 1.1293, + "epoch": 9.09, + "learning_rate": 8.56534090909091e-05, + "loss": 0.5223, "step": 805 }, { - "epoch": 16.31, - "learning_rate": 9.719387755102042e-05, - "loss": 1.17, + "epoch": 9.11, + "learning_rate": 8.551136363636364e-05, + "loss": 0.4972, "step": 806 }, { - "epoch": 16.33, - "learning_rate": 9.706632653061225e-05, - "loss": 1.17, + "epoch": 9.12, + "learning_rate": 8.536931818181818e-05, + "loss": 0.5005, "step": 807 }, { - "epoch": 16.35, - "learning_rate": 9.693877551020408e-05, - "loss": 1.2004, + "epoch": 9.13, + "learning_rate": 8.522727272727273e-05, + "loss": 0.5249, "step": 808 }, { - "epoch": 16.37, - "learning_rate": 9.681122448979593e-05, - "loss": 1.1648, + "epoch": 9.14, + "learning_rate": 8.508522727272728e-05, + "loss": 0.5135, "step": 809 }, { - "epoch": 16.39, - "learning_rate": 9.668367346938776e-05, - "loss": 1.0688, + "epoch": 9.15, + "learning_rate": 8.494318181818182e-05, + "loss": 0.5053, "step": 810 }, { - "epoch": 16.41, - "learning_rate": 9.655612244897959e-05, - "loss": 1.1607, + "epoch": 9.16, + "learning_rate": 8.480113636363637e-05, + "loss": 0.5158, "step": 811 }, { - "epoch": 16.43, - "learning_rate": 9.642857142857143e-05, - "loss": 1.1298, + "epoch": 9.17, + "learning_rate": 8.465909090909091e-05, + "loss": 0.5061, "step": 812 }, { - "epoch": 16.45, - "learning_rate": 9.630102040816326e-05, - "loss": 1.1064, + "epoch": 9.18, + "learning_rate": 8.451704545454546e-05, + "loss": 0.4988, "step": 813 }, { - "epoch": 16.47, - "learning_rate": 9.617346938775511e-05, - "loss": 1.1472, + "epoch": 9.2, + "learning_rate": 8.4375e-05, + "loss": 0.5273, "step": 814 }, { - "epoch": 16.49, - "learning_rate": 9.604591836734694e-05, - "loss": 1.1577, + "epoch": 9.21, + "learning_rate": 8.423295454545455e-05, + "loss": 0.5332, "step": 815 }, { - "epoch": 16.51, - "learning_rate": 9.591836734693878e-05, - "loss": 1.1436, + "epoch": 9.22, + "learning_rate": 8.40909090909091e-05, + "loss": 0.5181, "step": 816 }, { - "epoch": 16.53, - "learning_rate": 9.579081632653061e-05, - "loss": 1.1657, + "epoch": 9.23, + "learning_rate": 8.394886363636364e-05, + "loss": 0.5085, "step": 817 }, { - "epoch": 16.55, - "learning_rate": 9.566326530612246e-05, - "loss": 1.1147, + "epoch": 9.24, + "learning_rate": 8.380681818181818e-05, + "loss": 0.5137, "step": 818 }, { - "epoch": 16.57, - "learning_rate": 9.553571428571429e-05, - "loss": 1.1839, + "epoch": 9.25, + "learning_rate": 8.366477272727273e-05, + "loss": 0.5195, "step": 819 }, { - "epoch": 16.59, - "learning_rate": 9.540816326530613e-05, - "loss": 1.1298, + "epoch": 9.26, + "learning_rate": 8.352272727272727e-05, + "loss": 0.5077, "step": 820 }, { - "epoch": 16.61, - "learning_rate": 9.528061224489796e-05, - "loss": 1.2141, + "epoch": 9.28, + "learning_rate": 8.338068181818183e-05, + "loss": 0.5074, "step": 821 }, { - "epoch": 16.63, - "learning_rate": 9.515306122448981e-05, - "loss": 1.2045, + "epoch": 9.29, + "learning_rate": 8.323863636363637e-05, + "loss": 0.5142, "step": 822 }, { - "epoch": 16.65, - "learning_rate": 9.502551020408164e-05, - "loss": 1.1791, + "epoch": 9.3, + "learning_rate": 8.309659090909091e-05, + "loss": 0.5116, "step": 823 }, { - "epoch": 16.67, - "learning_rate": 9.489795918367348e-05, - "loss": 1.1137, + "epoch": 9.31, + "learning_rate": 8.295454545454547e-05, + "loss": 0.4974, "step": 824 }, { - "epoch": 16.69, - "learning_rate": 9.477040816326531e-05, - "loss": 1.1312, + "epoch": 9.32, + "learning_rate": 8.28125e-05, + "loss": 0.5117, "step": 825 }, { - "epoch": 16.71, - "learning_rate": 9.464285714285715e-05, - "loss": 1.1102, + "epoch": 9.33, + "learning_rate": 8.267045454545455e-05, + "loss": 0.5114, "step": 826 }, { - "epoch": 16.73, - "learning_rate": 9.451530612244899e-05, - "loss": 1.1865, + "epoch": 9.34, + "learning_rate": 8.25284090909091e-05, + "loss": 0.5039, "step": 827 }, { - "epoch": 16.75, - "learning_rate": 9.438775510204082e-05, - "loss": 1.1232, + "epoch": 9.35, + "learning_rate": 8.238636363636364e-05, + "loss": 0.498, "step": 828 }, { - "epoch": 16.77, - "learning_rate": 9.426020408163265e-05, - "loss": 1.2068, + "epoch": 9.37, + "learning_rate": 8.224431818181818e-05, + "loss": 0.5042, "step": 829 }, { - "epoch": 16.79, - "learning_rate": 9.41326530612245e-05, - "loss": 1.1864, + "epoch": 9.38, + "learning_rate": 8.210227272727274e-05, + "loss": 0.5049, "step": 830 }, { - "epoch": 16.81, - "learning_rate": 9.400510204081633e-05, - "loss": 1.2195, + "epoch": 9.39, + "learning_rate": 8.196022727272727e-05, + "loss": 0.5123, "step": 831 }, { - "epoch": 16.83, - "learning_rate": 9.387755102040817e-05, - "loss": 1.2063, + "epoch": 9.4, + "learning_rate": 8.181818181818183e-05, + "loss": 0.4907, "step": 832 }, { - "epoch": 16.85, - "learning_rate": 9.375e-05, - "loss": 1.1455, + "epoch": 9.41, + "learning_rate": 8.167613636363637e-05, + "loss": 0.5267, "step": 833 }, { - "epoch": 16.88, - "learning_rate": 9.362244897959183e-05, - "loss": 1.1819, + "epoch": 9.42, + "learning_rate": 8.15340909090909e-05, + "loss": 0.5314, "step": 834 }, { - "epoch": 16.9, - "learning_rate": 9.349489795918368e-05, - "loss": 1.1887, + "epoch": 9.43, + "learning_rate": 8.139204545454546e-05, + "loss": 0.4952, "step": 835 }, { - "epoch": 16.92, - "learning_rate": 9.336734693877551e-05, - "loss": 1.1557, + "epoch": 9.44, + "learning_rate": 8.125000000000001e-05, + "loss": 0.5014, "step": 836 }, { - "epoch": 16.94, - "learning_rate": 9.323979591836735e-05, - "loss": 1.2094, + "epoch": 9.46, + "learning_rate": 8.110795454545454e-05, + "loss": 0.4967, "step": 837 }, { - "epoch": 16.96, - "learning_rate": 9.311224489795918e-05, - "loss": 1.1512, + "epoch": 9.47, + "learning_rate": 8.09659090909091e-05, + "loss": 0.5116, "step": 838 }, { - "epoch": 16.98, - "learning_rate": 9.298469387755103e-05, - "loss": 1.1463, + "epoch": 9.48, + "learning_rate": 8.082386363636365e-05, + "loss": 0.5119, "step": 839 }, { - "epoch": 17.0, - "learning_rate": 9.285714285714286e-05, - "loss": 1.155, + "epoch": 9.49, + "learning_rate": 8.068181818181818e-05, + "loss": 0.4987, "step": 840 }, { - "epoch": 17.02, - "learning_rate": 9.27295918367347e-05, - "loss": 1.1292, + "epoch": 9.5, + "learning_rate": 8.053977272727274e-05, + "loss": 0.5063, "step": 841 }, { - "epoch": 17.04, - "learning_rate": 9.260204081632653e-05, - "loss": 1.0996, + "epoch": 9.51, + "learning_rate": 8.039772727272728e-05, + "loss": 0.5019, "step": 842 }, { - "epoch": 17.06, - "learning_rate": 9.247448979591838e-05, - "loss": 1.0662, + "epoch": 9.52, + "learning_rate": 8.025568181818183e-05, + "loss": 0.5272, "step": 843 }, { - "epoch": 17.08, - "learning_rate": 9.234693877551021e-05, - "loss": 1.0931, + "epoch": 9.54, + "learning_rate": 8.011363636363637e-05, + "loss": 0.4969, "step": 844 }, { - "epoch": 17.1, - "learning_rate": 9.221938775510205e-05, - "loss": 1.0727, + "epoch": 9.55, + "learning_rate": 7.997159090909092e-05, + "loss": 0.5222, "step": 845 }, { - "epoch": 17.12, - "learning_rate": 9.209183673469388e-05, - "loss": 1.1043, + "epoch": 9.56, + "learning_rate": 7.982954545454546e-05, + "loss": 0.4729, "step": 846 }, { - "epoch": 17.14, - "learning_rate": 9.196428571428572e-05, - "loss": 1.0594, + "epoch": 9.57, + "learning_rate": 7.96875e-05, + "loss": 0.4976, "step": 847 }, { - "epoch": 17.16, - "learning_rate": 9.183673469387756e-05, - "loss": 1.0952, + "epoch": 9.58, + "learning_rate": 7.954545454545455e-05, + "loss": 0.4974, "step": 848 }, { - "epoch": 17.18, - "learning_rate": 9.170918367346939e-05, - "loss": 1.0639, + "epoch": 9.59, + "learning_rate": 7.94034090909091e-05, + "loss": 0.4849, "step": 849 }, { - "epoch": 17.2, - "learning_rate": 9.158163265306124e-05, - "loss": 1.132, + "epoch": 9.6, + "learning_rate": 7.926136363636364e-05, + "loss": 0.4897, "step": 850 }, { - "epoch": 17.22, - "learning_rate": 9.145408163265307e-05, - "loss": 1.1083, + "epoch": 9.61, + "learning_rate": 7.911931818181819e-05, + "loss": 0.4962, "step": 851 }, { - "epoch": 17.24, - "learning_rate": 9.13265306122449e-05, - "loss": 1.1282, + "epoch": 9.63, + "learning_rate": 7.897727272727273e-05, + "loss": 0.4877, "step": 852 }, { - "epoch": 17.26, - "learning_rate": 9.119897959183674e-05, - "loss": 1.0474, + "epoch": 9.64, + "learning_rate": 7.883522727272728e-05, + "loss": 0.4921, "step": 853 }, { - "epoch": 17.28, - "learning_rate": 9.107142857142857e-05, - "loss": 1.1138, + "epoch": 9.65, + "learning_rate": 7.869318181818182e-05, + "loss": 0.4969, "step": 854 }, { - "epoch": 17.3, - "learning_rate": 9.094387755102042e-05, - "loss": 1.1025, + "epoch": 9.66, + "learning_rate": 7.855113636363637e-05, + "loss": 0.5045, "step": 855 }, { - "epoch": 17.32, - "learning_rate": 9.081632653061225e-05, - "loss": 1.0968, + "epoch": 9.67, + "learning_rate": 7.840909090909091e-05, + "loss": 0.5207, "step": 856 }, { - "epoch": 17.34, - "learning_rate": 9.068877551020408e-05, - "loss": 1.1683, + "epoch": 9.68, + "learning_rate": 7.826704545454546e-05, + "loss": 0.5098, "step": 857 }, { - "epoch": 17.36, - "learning_rate": 9.056122448979592e-05, - "loss": 1.0975, + "epoch": 9.69, + "learning_rate": 7.8125e-05, + "loss": 0.5005, "step": 858 }, { - "epoch": 17.38, - "learning_rate": 9.043367346938775e-05, - "loss": 1.1274, + "epoch": 9.7, + "learning_rate": 7.798295454545455e-05, + "loss": 0.5028, "step": 859 }, { - "epoch": 17.4, - "learning_rate": 9.030612244897958e-05, - "loss": 1.0916, + "epoch": 9.72, + "learning_rate": 7.784090909090909e-05, + "loss": 0.5067, "step": 860 }, { - "epoch": 17.42, - "learning_rate": 9.017857142857143e-05, - "loss": 1.0912, + "epoch": 9.73, + "learning_rate": 7.769886363636364e-05, + "loss": 0.484, "step": 861 }, { - "epoch": 17.44, - "learning_rate": 9.005102040816327e-05, - "loss": 1.0875, + "epoch": 9.74, + "learning_rate": 7.755681818181818e-05, + "loss": 0.5029, "step": 862 }, { - "epoch": 17.46, - "learning_rate": 8.99234693877551e-05, - "loss": 1.05, + "epoch": 9.75, + "learning_rate": 7.741477272727273e-05, + "loss": 0.5077, "step": 863 }, { - "epoch": 17.48, - "learning_rate": 8.979591836734695e-05, - "loss": 1.1418, + "epoch": 9.76, + "learning_rate": 7.727272727272727e-05, + "loss": 0.5091, "step": 864 }, { - "epoch": 17.5, - "learning_rate": 8.966836734693878e-05, - "loss": 1.0609, + "epoch": 9.77, + "learning_rate": 7.713068181818183e-05, + "loss": 0.4781, "step": 865 }, { - "epoch": 17.52, - "learning_rate": 8.954081632653062e-05, - "loss": 1.1611, + "epoch": 9.78, + "learning_rate": 7.698863636363636e-05, + "loss": 0.5124, "step": 866 }, { - "epoch": 17.54, - "learning_rate": 8.941326530612245e-05, - "loss": 1.1065, + "epoch": 9.79, + "learning_rate": 7.684659090909091e-05, + "loss": 0.4859, "step": 867 }, { - "epoch": 17.56, - "learning_rate": 8.92857142857143e-05, - "loss": 1.1611, + "epoch": 9.81, + "learning_rate": 7.670454545454547e-05, + "loss": 0.4872, "step": 868 }, { - "epoch": 17.58, - "learning_rate": 8.915816326530613e-05, - "loss": 1.1398, + "epoch": 9.82, + "learning_rate": 7.65625e-05, + "loss": 0.4675, "step": 869 }, { - "epoch": 17.6, - "learning_rate": 8.903061224489796e-05, - "loss": 1.1055, + "epoch": 9.83, + "learning_rate": 7.642045454545454e-05, + "loss": 0.5056, "step": 870 }, { - "epoch": 17.62, - "learning_rate": 8.89030612244898e-05, - "loss": 1.1314, + "epoch": 9.84, + "learning_rate": 7.62784090909091e-05, + "loss": 0.4868, "step": 871 }, { - "epoch": 17.64, - "learning_rate": 8.877551020408164e-05, - "loss": 1.1084, + "epoch": 9.85, + "learning_rate": 7.613636363636363e-05, + "loss": 0.4907, "step": 872 }, { - "epoch": 17.66, - "learning_rate": 8.864795918367348e-05, - "loss": 1.1254, + "epoch": 9.86, + "learning_rate": 7.599431818181818e-05, + "loss": 0.474, "step": 873 }, { - "epoch": 17.68, - "learning_rate": 8.852040816326531e-05, - "loss": 1.142, + "epoch": 9.87, + "learning_rate": 7.585227272727274e-05, + "loss": 0.4813, "step": 874 }, { - "epoch": 17.7, - "learning_rate": 8.839285714285714e-05, - "loss": 1.1371, + "epoch": 9.89, + "learning_rate": 7.571022727272727e-05, + "loss": 0.4838, "step": 875 }, { - "epoch": 17.72, - "learning_rate": 8.826530612244899e-05, - "loss": 1.1092, + "epoch": 9.9, + "learning_rate": 7.556818181818183e-05, + "loss": 0.4935, "step": 876 }, { - "epoch": 17.75, - "learning_rate": 8.813775510204082e-05, - "loss": 1.161, + "epoch": 9.91, + "learning_rate": 7.542613636363637e-05, + "loss": 0.4884, "step": 877 }, { - "epoch": 17.77, - "learning_rate": 8.801020408163265e-05, - "loss": 1.1044, + "epoch": 9.92, + "learning_rate": 7.52840909090909e-05, + "loss": 0.4797, "step": 878 }, { - "epoch": 17.79, - "learning_rate": 8.788265306122449e-05, - "loss": 1.117, + "epoch": 9.93, + "learning_rate": 7.514204545454546e-05, + "loss": 0.479, "step": 879 }, { - "epoch": 17.81, - "learning_rate": 8.775510204081632e-05, - "loss": 1.1262, + "epoch": 9.94, + "learning_rate": 7.500000000000001e-05, + "loss": 0.4727, "step": 880 }, { - "epoch": 17.83, - "learning_rate": 8.762755102040817e-05, - "loss": 1.0829, + "epoch": 9.95, + "learning_rate": 7.485795454545454e-05, + "loss": 0.4758, "step": 881 }, { - "epoch": 17.85, - "learning_rate": 8.75e-05, - "loss": 1.1393, + "epoch": 9.96, + "learning_rate": 7.47159090909091e-05, + "loss": 0.482, "step": 882 }, { - "epoch": 17.87, - "learning_rate": 8.737244897959183e-05, - "loss": 1.1781, + "epoch": 9.98, + "learning_rate": 7.457386363636364e-05, + "loss": 0.4951, "step": 883 }, { - "epoch": 17.89, - "learning_rate": 8.724489795918367e-05, - "loss": 1.1582, + "epoch": 9.99, + "learning_rate": 7.443181818181817e-05, + "loss": 0.4823, "step": 884 }, { - "epoch": 17.91, - "learning_rate": 8.711734693877552e-05, - "loss": 1.1469, + "epoch": 10.0, + "learning_rate": 7.428977272727273e-05, + "loss": 0.4638, "step": 885 }, { - "epoch": 17.93, - "learning_rate": 8.698979591836735e-05, - "loss": 1.1494, + "epoch": 10.01, + "learning_rate": 7.414772727272728e-05, + "loss": 0.4715, "step": 886 }, { - "epoch": 17.95, - "learning_rate": 8.68622448979592e-05, - "loss": 1.1251, + "epoch": 10.02, + "learning_rate": 7.400568181818182e-05, + "loss": 0.461, "step": 887 }, { - "epoch": 17.97, - "learning_rate": 8.673469387755102e-05, - "loss": 1.1624, + "epoch": 10.03, + "learning_rate": 7.386363636363637e-05, + "loss": 0.4429, "step": 888 }, { - "epoch": 17.99, - "learning_rate": 8.660714285714287e-05, - "loss": 1.0842, + "epoch": 10.04, + "learning_rate": 7.372159090909091e-05, + "loss": 0.4403, "step": 889 }, { - "epoch": 18.01, - "learning_rate": 8.64795918367347e-05, - "loss": 1.1944, + "epoch": 10.05, + "learning_rate": 7.357954545454546e-05, + "loss": 0.4519, "step": 890 }, { - "epoch": 18.03, - "learning_rate": 8.635204081632653e-05, - "loss": 1.0642, + "epoch": 10.07, + "learning_rate": 7.34375e-05, + "loss": 0.4611, "step": 891 }, { - "epoch": 18.05, - "learning_rate": 8.622448979591838e-05, - "loss": 1.0459, + "epoch": 10.08, + "learning_rate": 7.329545454545455e-05, + "loss": 0.4543, "step": 892 }, { - "epoch": 18.07, - "learning_rate": 8.60969387755102e-05, - "loss": 1.0941, + "epoch": 10.09, + "learning_rate": 7.315340909090909e-05, + "loss": 0.4528, "step": 893 }, { - "epoch": 18.09, - "learning_rate": 8.596938775510205e-05, - "loss": 1.0457, + "epoch": 10.1, + "learning_rate": 7.301136363636364e-05, + "loss": 0.4586, "step": 894 }, { - "epoch": 18.11, - "learning_rate": 8.584183673469388e-05, - "loss": 1.1033, + "epoch": 10.11, + "learning_rate": 7.286931818181818e-05, + "loss": 0.4418, "step": 895 }, { - "epoch": 18.13, - "learning_rate": 8.571428571428571e-05, - "loss": 1.0756, + "epoch": 10.12, + "learning_rate": 7.272727272727273e-05, + "loss": 0.4435, "step": 896 }, { - "epoch": 18.15, - "learning_rate": 8.558673469387756e-05, - "loss": 1.0615, + "epoch": 10.13, + "learning_rate": 7.258522727272727e-05, + "loss": 0.44, "step": 897 }, { - "epoch": 18.17, - "learning_rate": 8.545918367346939e-05, - "loss": 1.0828, + "epoch": 10.15, + "learning_rate": 7.244318181818183e-05, + "loss": 0.4589, "step": 898 }, { - "epoch": 18.19, - "learning_rate": 8.533163265306123e-05, - "loss": 1.1158, + "epoch": 10.16, + "learning_rate": 7.230113636363636e-05, + "loss": 0.4597, "step": 899 }, { - "epoch": 18.21, - "learning_rate": 8.520408163265306e-05, - "loss": 1.0133, + "epoch": 10.17, + "learning_rate": 7.215909090909091e-05, + "loss": 0.4479, "step": 900 }, { - "epoch": 18.23, - "learning_rate": 8.50765306122449e-05, - "loss": 1.0437, + "epoch": 10.18, + "learning_rate": 7.201704545454547e-05, + "loss": 0.4477, "step": 901 }, { - "epoch": 18.25, - "learning_rate": 8.494897959183674e-05, - "loss": 1.0372, + "epoch": 10.19, + "learning_rate": 7.1875e-05, + "loss": 0.446, "step": 902 }, { - "epoch": 18.27, - "learning_rate": 8.482142857142857e-05, - "loss": 1.1012, + "epoch": 10.2, + "learning_rate": 7.173295454545454e-05, + "loss": 0.4546, "step": 903 }, { - "epoch": 18.29, - "learning_rate": 8.469387755102041e-05, - "loss": 1.0777, + "epoch": 10.21, + "learning_rate": 7.15909090909091e-05, + "loss": 0.4347, "step": 904 }, { - "epoch": 18.31, - "learning_rate": 8.456632653061224e-05, - "loss": 1.0799, + "epoch": 10.22, + "learning_rate": 7.144886363636363e-05, + "loss": 0.452, "step": 905 }, { - "epoch": 18.33, - "learning_rate": 8.443877551020409e-05, - "loss": 0.9846, + "epoch": 10.24, + "learning_rate": 7.130681818181818e-05, + "loss": 0.4536, "step": 906 }, { - "epoch": 18.35, - "learning_rate": 8.431122448979592e-05, - "loss": 1.1, + "epoch": 10.25, + "learning_rate": 7.116477272727274e-05, + "loss": 0.4492, "step": 907 }, { - "epoch": 18.37, - "learning_rate": 8.418367346938776e-05, - "loss": 1.0787, + "epoch": 10.26, + "learning_rate": 7.102272727272727e-05, + "loss": 0.4401, "step": 908 }, { - "epoch": 18.39, - "learning_rate": 8.40561224489796e-05, - "loss": 1.0647, + "epoch": 10.27, + "learning_rate": 7.088068181818183e-05, + "loss": 0.4609, "step": 909 }, { - "epoch": 18.41, - "learning_rate": 8.392857142857144e-05, - "loss": 1.056, + "epoch": 10.28, + "learning_rate": 7.073863636363637e-05, + "loss": 0.4544, "step": 910 }, { - "epoch": 18.43, - "learning_rate": 8.380102040816327e-05, - "loss": 1.1131, + "epoch": 10.29, + "learning_rate": 7.05965909090909e-05, + "loss": 0.4477, "step": 911 }, { - "epoch": 18.45, - "learning_rate": 8.367346938775511e-05, - "loss": 1.0825, + "epoch": 10.3, + "learning_rate": 7.045454545454546e-05, + "loss": 0.4445, "step": 912 }, { - "epoch": 18.47, - "learning_rate": 8.354591836734695e-05, - "loss": 1.0681, + "epoch": 10.31, + "learning_rate": 7.031250000000001e-05, + "loss": 0.4544, "step": 913 }, { - "epoch": 18.49, - "learning_rate": 8.341836734693878e-05, - "loss": 1.0479, + "epoch": 10.33, + "learning_rate": 7.017045454545454e-05, + "loss": 0.4634, "step": 914 }, { - "epoch": 18.51, - "learning_rate": 8.329081632653062e-05, - "loss": 1.0921, + "epoch": 10.34, + "learning_rate": 7.00284090909091e-05, + "loss": 0.4499, "step": 915 }, { - "epoch": 18.53, - "learning_rate": 8.316326530612245e-05, - "loss": 1.0626, + "epoch": 10.35, + "learning_rate": 6.988636363636364e-05, + "loss": 0.4354, "step": 916 }, { - "epoch": 18.55, - "learning_rate": 8.30357142857143e-05, - "loss": 1.0518, + "epoch": 10.36, + "learning_rate": 6.974431818181818e-05, + "loss": 0.454, "step": 917 }, { - "epoch": 18.57, - "learning_rate": 8.290816326530613e-05, - "loss": 1.0557, + "epoch": 10.37, + "learning_rate": 6.960227272727273e-05, + "loss": 0.4473, "step": 918 }, { - "epoch": 18.6, - "learning_rate": 8.278061224489796e-05, - "loss": 1.0831, + "epoch": 10.38, + "learning_rate": 6.946022727272728e-05, + "loss": 0.4347, "step": 919 }, { - "epoch": 18.62, - "learning_rate": 8.26530612244898e-05, - "loss": 1.0307, + "epoch": 10.39, + "learning_rate": 6.931818181818182e-05, + "loss": 0.441, "step": 920 }, { - "epoch": 18.64, - "learning_rate": 8.252551020408163e-05, - "loss": 1.0455, + "epoch": 10.4, + "learning_rate": 6.917613636363637e-05, + "loss": 0.4545, "step": 921 }, { - "epoch": 18.66, - "learning_rate": 8.239795918367348e-05, - "loss": 1.0667, + "epoch": 10.42, + "learning_rate": 6.903409090909091e-05, + "loss": 0.458, "step": 922 }, { - "epoch": 18.68, - "learning_rate": 8.227040816326531e-05, - "loss": 1.0736, + "epoch": 10.43, + "learning_rate": 6.889204545454546e-05, + "loss": 0.4381, "step": 923 }, { - "epoch": 18.7, - "learning_rate": 8.214285714285714e-05, - "loss": 1.0108, + "epoch": 10.44, + "learning_rate": 6.875e-05, + "loss": 0.441, "step": 924 }, { - "epoch": 18.72, - "learning_rate": 8.201530612244898e-05, - "loss": 1.0458, + "epoch": 10.45, + "learning_rate": 6.860795454545455e-05, + "loss": 0.4446, "step": 925 }, { - "epoch": 18.74, - "learning_rate": 8.188775510204081e-05, - "loss": 1.0852, + "epoch": 10.46, + "learning_rate": 6.84659090909091e-05, + "loss": 0.4548, "step": 926 }, { - "epoch": 18.76, - "learning_rate": 8.176020408163265e-05, - "loss": 1.1207, + "epoch": 10.47, + "learning_rate": 6.832386363636364e-05, + "loss": 0.4404, "step": 927 }, { - "epoch": 18.78, - "learning_rate": 8.163265306122449e-05, - "loss": 1.0914, + "epoch": 10.48, + "learning_rate": 6.818181818181818e-05, + "loss": 0.4446, "step": 928 }, { - "epoch": 18.8, - "learning_rate": 8.150510204081633e-05, - "loss": 1.1108, + "epoch": 10.5, + "learning_rate": 6.803977272727273e-05, + "loss": 0.4434, "step": 929 }, { - "epoch": 18.82, - "learning_rate": 8.137755102040817e-05, - "loss": 1.1394, + "epoch": 10.51, + "learning_rate": 6.789772727272727e-05, + "loss": 0.4778, "step": 930 }, { - "epoch": 18.84, - "learning_rate": 8.125000000000001e-05, - "loss": 1.029, + "epoch": 10.52, + "learning_rate": 6.775568181818182e-05, + "loss": 0.4356, "step": 931 }, { - "epoch": 18.86, - "learning_rate": 8.112244897959184e-05, - "loss": 1.0661, + "epoch": 10.53, + "learning_rate": 6.761363636363636e-05, + "loss": 0.4464, "step": 932 }, { - "epoch": 18.88, - "learning_rate": 8.099489795918369e-05, - "loss": 1.0303, + "epoch": 10.54, + "learning_rate": 6.747159090909091e-05, + "loss": 0.4387, "step": 933 }, { - "epoch": 18.9, - "learning_rate": 8.086734693877552e-05, - "loss": 1.1144, + "epoch": 10.55, + "learning_rate": 6.732954545454547e-05, + "loss": 0.456, "step": 934 }, { - "epoch": 18.92, - "learning_rate": 8.073979591836736e-05, - "loss": 1.1096, + "epoch": 10.56, + "learning_rate": 6.71875e-05, + "loss": 0.453, "step": 935 }, { - "epoch": 18.94, - "learning_rate": 8.061224489795919e-05, - "loss": 1.123, + "epoch": 10.57, + "learning_rate": 6.704545454545455e-05, + "loss": 0.4611, "step": 936 }, { - "epoch": 18.96, - "learning_rate": 8.048469387755102e-05, - "loss": 1.1002, + "epoch": 10.59, + "learning_rate": 6.69034090909091e-05, + "loss": 0.4354, "step": 937 }, { - "epoch": 18.98, - "learning_rate": 8.035714285714287e-05, - "loss": 1.1016, + "epoch": 10.6, + "learning_rate": 6.676136363636364e-05, + "loss": 0.4519, "step": 938 }, { - "epoch": 19.0, - "learning_rate": 8.02295918367347e-05, - "loss": 1.0847, + "epoch": 10.61, + "learning_rate": 6.661931818181818e-05, + "loss": 0.4435, "step": 939 }, { - "epoch": 19.02, - "learning_rate": 8.010204081632653e-05, - "loss": 1.1029, + "epoch": 10.62, + "learning_rate": 6.647727272727274e-05, + "loss": 0.4422, "step": 940 }, { - "epoch": 19.04, - "learning_rate": 7.997448979591837e-05, - "loss": 1.041, + "epoch": 10.63, + "learning_rate": 6.633522727272727e-05, + "loss": 0.4344, "step": 941 }, { - "epoch": 19.06, - "learning_rate": 7.98469387755102e-05, - "loss": 1.01, + "epoch": 10.64, + "learning_rate": 6.619318181818183e-05, + "loss": 0.4419, "step": 942 }, { - "epoch": 19.08, - "learning_rate": 7.971938775510205e-05, - "loss": 1.0197, + "epoch": 10.65, + "learning_rate": 6.605113636363637e-05, + "loss": 0.4308, "step": 943 }, { - "epoch": 19.1, - "learning_rate": 7.959183673469388e-05, - "loss": 1.0543, + "epoch": 10.66, + "learning_rate": 6.59090909090909e-05, + "loss": 0.4043, "step": 944 }, { - "epoch": 19.12, - "learning_rate": 7.946428571428571e-05, - "loss": 1.0369, + "epoch": 10.68, + "learning_rate": 6.576704545454546e-05, + "loss": 0.4626, "step": 945 }, { - "epoch": 19.14, - "learning_rate": 7.933673469387755e-05, - "loss": 1.0154, + "epoch": 10.69, + "learning_rate": 6.562500000000001e-05, + "loss": 0.4365, "step": 946 }, { - "epoch": 19.16, - "learning_rate": 7.920918367346939e-05, - "loss": 0.9546, + "epoch": 10.7, + "learning_rate": 6.548295454545454e-05, + "loss": 0.4397, "step": 947 }, { - "epoch": 19.18, - "learning_rate": 7.908163265306123e-05, - "loss": 0.9982, + "epoch": 10.71, + "learning_rate": 6.53409090909091e-05, + "loss": 0.4463, "step": 948 }, { - "epoch": 19.2, - "learning_rate": 7.895408163265306e-05, - "loss": 1.0748, + "epoch": 10.72, + "learning_rate": 6.519886363636364e-05, + "loss": 0.4394, "step": 949 }, { - "epoch": 19.22, - "learning_rate": 7.882653061224489e-05, - "loss": 1.0562, + "epoch": 10.73, + "learning_rate": 6.505681818181818e-05, + "loss": 0.45, "step": 950 }, { - "epoch": 19.24, - "learning_rate": 7.869897959183674e-05, - "loss": 1.0352, + "epoch": 10.74, + "learning_rate": 6.491477272727273e-05, + "loss": 0.4363, "step": 951 }, { - "epoch": 19.26, - "learning_rate": 7.857142857142858e-05, - "loss": 0.9976, + "epoch": 10.76, + "learning_rate": 6.477272727272728e-05, + "loss": 0.4566, "step": 952 }, { - "epoch": 19.28, - "learning_rate": 7.844387755102041e-05, - "loss": 1.0221, + "epoch": 10.77, + "learning_rate": 6.463068181818183e-05, + "loss": 0.4235, "step": 953 }, { - "epoch": 19.3, - "learning_rate": 7.831632653061226e-05, - "loss": 1.0119, + "epoch": 10.78, + "learning_rate": 6.448863636363637e-05, + "loss": 0.4458, "step": 954 }, { - "epoch": 19.32, - "learning_rate": 7.818877551020409e-05, - "loss": 1.0657, + "epoch": 10.79, + "learning_rate": 6.434659090909092e-05, + "loss": 0.423, "step": 955 }, { - "epoch": 19.34, - "learning_rate": 7.806122448979593e-05, - "loss": 0.9591, + "epoch": 10.8, + "learning_rate": 6.420454545454546e-05, + "loss": 0.445, "step": 956 }, { - "epoch": 19.36, - "learning_rate": 7.793367346938776e-05, - "loss": 1.0101, + "epoch": 10.81, + "learning_rate": 6.40625e-05, + "loss": 0.424, "step": 957 }, { - "epoch": 19.38, - "learning_rate": 7.780612244897959e-05, - "loss": 1.0453, + "epoch": 10.82, + "learning_rate": 6.392045454545455e-05, + "loss": 0.4224, "step": 958 }, { - "epoch": 19.4, - "learning_rate": 7.767857142857144e-05, - "loss": 1.0461, + "epoch": 10.83, + "learning_rate": 6.37784090909091e-05, + "loss": 0.4223, "step": 959 }, { - "epoch": 19.42, - "learning_rate": 7.755102040816327e-05, - "loss": 1.0959, + "epoch": 10.85, + "learning_rate": 6.363636363636364e-05, + "loss": 0.4314, "step": 960 }, { - "epoch": 19.44, - "learning_rate": 7.742346938775511e-05, - "loss": 1.0608, + "epoch": 10.86, + "learning_rate": 6.349431818181819e-05, + "loss": 0.4488, "step": 961 }, { - "epoch": 19.47, - "learning_rate": 7.729591836734694e-05, - "loss": 1.1177, + "epoch": 10.87, + "learning_rate": 6.335227272727273e-05, + "loss": 0.423, "step": 962 }, { - "epoch": 19.49, - "learning_rate": 7.716836734693877e-05, - "loss": 1.0354, + "epoch": 10.88, + "learning_rate": 6.321022727272728e-05, + "loss": 0.4416, "step": 963 }, { - "epoch": 19.51, - "learning_rate": 7.704081632653062e-05, - "loss": 1.0507, + "epoch": 10.89, + "learning_rate": 6.306818181818182e-05, + "loss": 0.423, "step": 964 }, { - "epoch": 19.53, - "learning_rate": 7.691326530612245e-05, - "loss": 1.0313, + "epoch": 10.9, + "learning_rate": 6.292613636363637e-05, + "loss": 0.4502, "step": 965 }, { - "epoch": 19.55, - "learning_rate": 7.67857142857143e-05, - "loss": 1.0569, + "epoch": 10.91, + "learning_rate": 6.278409090909091e-05, + "loss": 0.4266, "step": 966 }, { - "epoch": 19.57, - "learning_rate": 7.665816326530612e-05, - "loss": 1.0862, + "epoch": 10.92, + "learning_rate": 6.264204545454546e-05, + "loss": 0.4344, "step": 967 }, { - "epoch": 19.59, - "learning_rate": 7.653061224489796e-05, - "loss": 1.0593, + "epoch": 10.94, + "learning_rate": 6.25e-05, + "loss": 0.434, "step": 968 }, { - "epoch": 19.61, - "learning_rate": 7.64030612244898e-05, - "loss": 1.0602, + "epoch": 10.95, + "learning_rate": 6.235795454545455e-05, + "loss": 0.4269, "step": 969 }, { - "epoch": 19.63, - "learning_rate": 7.627551020408163e-05, - "loss": 1.0048, + "epoch": 10.96, + "learning_rate": 6.221590909090909e-05, + "loss": 0.4158, "step": 970 }, { - "epoch": 19.65, - "learning_rate": 7.614795918367347e-05, - "loss": 1.0346, + "epoch": 10.97, + "learning_rate": 6.207386363636364e-05, + "loss": 0.4231, "step": 971 }, { - "epoch": 19.67, - "learning_rate": 7.60204081632653e-05, - "loss": 1.0172, + "epoch": 10.98, + "learning_rate": 6.193181818181818e-05, + "loss": 0.4235, "step": 972 }, { - "epoch": 19.69, - "learning_rate": 7.589285714285714e-05, - "loss": 1.02, + "epoch": 10.99, + "learning_rate": 6.178977272727273e-05, + "loss": 0.4504, "step": 973 }, { - "epoch": 19.71, - "learning_rate": 7.576530612244898e-05, - "loss": 1.0028, + "epoch": 11.0, + "learning_rate": 6.164772727272727e-05, + "loss": 0.4394, "step": 974 }, { - "epoch": 19.73, - "learning_rate": 7.563775510204083e-05, - "loss": 1.08, + "epoch": 11.02, + "learning_rate": 6.150568181818183e-05, + "loss": 0.4333, "step": 975 }, { - "epoch": 19.75, - "learning_rate": 7.551020408163266e-05, - "loss": 1.0402, + "epoch": 11.03, + "learning_rate": 6.136363636363636e-05, + "loss": 0.3936, "step": 976 }, { - "epoch": 19.77, - "learning_rate": 7.53826530612245e-05, - "loss": 1.0567, + "epoch": 11.04, + "learning_rate": 6.122159090909091e-05, + "loss": 0.3933, "step": 977 }, { - "epoch": 19.79, - "learning_rate": 7.525510204081633e-05, - "loss": 1.0169, + "epoch": 11.05, + "learning_rate": 6.107954545454547e-05, + "loss": 0.4161, "step": 978 }, { - "epoch": 19.81, - "learning_rate": 7.512755102040818e-05, - "loss": 0.9881, + "epoch": 11.06, + "learning_rate": 6.0937500000000004e-05, + "loss": 0.4097, "step": 979 }, { - "epoch": 19.83, - "learning_rate": 7.500000000000001e-05, - "loss": 1.0677, + "epoch": 11.07, + "learning_rate": 6.079545454545454e-05, + "loss": 0.412, "step": 980 }, { - "epoch": 19.85, - "learning_rate": 7.487244897959184e-05, - "loss": 1.1026, + "epoch": 11.08, + "learning_rate": 6.0653409090909094e-05, + "loss": 0.4104, "step": 981 }, { - "epoch": 19.87, - "learning_rate": 7.474489795918368e-05, - "loss": 1.0101, + "epoch": 11.09, + "learning_rate": 6.051136363636364e-05, + "loss": 0.4152, "step": 982 }, { - "epoch": 19.89, - "learning_rate": 7.461734693877551e-05, - "loss": 1.069, + "epoch": 11.11, + "learning_rate": 6.036931818181818e-05, + "loss": 0.4037, "step": 983 }, { - "epoch": 19.91, - "learning_rate": 7.448979591836736e-05, - "loss": 1.0493, + "epoch": 11.12, + "learning_rate": 6.022727272727273e-05, + "loss": 0.413, "step": 984 }, { - "epoch": 19.93, - "learning_rate": 7.436224489795919e-05, - "loss": 1.0858, + "epoch": 11.13, + "learning_rate": 6.0085227272727274e-05, + "loss": 0.4413, "step": 985 }, { - "epoch": 19.95, - "learning_rate": 7.423469387755102e-05, - "loss": 1.0734, + "epoch": 11.14, + "learning_rate": 5.9943181818181826e-05, + "loss": 0.3908, "step": 986 }, { - "epoch": 19.97, - "learning_rate": 7.410714285714286e-05, - "loss": 1.0203, + "epoch": 11.15, + "learning_rate": 5.9801136363636365e-05, + "loss": 0.3982, "step": 987 }, { - "epoch": 19.99, - "learning_rate": 7.39795918367347e-05, - "loss": 1.0285, + "epoch": 11.16, + "learning_rate": 5.965909090909091e-05, + "loss": 0.4109, "step": 988 }, { - "epoch": 20.01, - "learning_rate": 7.385204081632653e-05, - "loss": 0.9446, + "epoch": 11.17, + "learning_rate": 5.951704545454546e-05, + "loss": 0.3923, "step": 989 }, { - "epoch": 20.03, - "learning_rate": 7.372448979591837e-05, - "loss": 0.9915, + "epoch": 11.18, + "learning_rate": 5.9375e-05, + "loss": 0.4107, "step": 990 }, { - "epoch": 20.05, - "learning_rate": 7.35969387755102e-05, - "loss": 0.9882, + "epoch": 11.2, + "learning_rate": 5.9232954545454545e-05, + "loss": 0.4099, "step": 991 }, { - "epoch": 20.07, - "learning_rate": 7.346938775510205e-05, - "loss": 0.9338, + "epoch": 11.21, + "learning_rate": 5.90909090909091e-05, + "loss": 0.4163, "step": 992 }, { - "epoch": 20.09, - "learning_rate": 7.334183673469388e-05, - "loss": 0.942, + "epoch": 11.22, + "learning_rate": 5.8948863636363635e-05, + "loss": 0.4189, "step": 993 }, { - "epoch": 20.11, - "learning_rate": 7.321428571428571e-05, - "loss": 0.9725, + "epoch": 11.23, + "learning_rate": 5.880681818181818e-05, + "loss": 0.3889, "step": 994 }, { - "epoch": 20.13, - "learning_rate": 7.308673469387755e-05, - "loss": 1.027, + "epoch": 11.24, + "learning_rate": 5.866477272727273e-05, + "loss": 0.3988, "step": 995 }, { - "epoch": 20.15, - "learning_rate": 7.29591836734694e-05, - "loss": 1.0081, + "epoch": 11.25, + "learning_rate": 5.852272727272727e-05, + "loss": 0.4215, "step": 996 }, { - "epoch": 20.17, - "learning_rate": 7.283163265306123e-05, - "loss": 1.0117, + "epoch": 11.26, + "learning_rate": 5.838068181818183e-05, + "loss": 0.4207, "step": 997 }, { - "epoch": 20.19, - "learning_rate": 7.270408163265307e-05, - "loss": 0.969, + "epoch": 11.27, + "learning_rate": 5.823863636363637e-05, + "loss": 0.413, "step": 998 }, { - "epoch": 20.21, - "learning_rate": 7.25765306122449e-05, - "loss": 1.0024, + "epoch": 11.29, + "learning_rate": 5.8096590909090906e-05, + "loss": 0.4057, "step": 999 }, { - "epoch": 20.23, - "learning_rate": 7.244897959183675e-05, - "loss": 0.994, + "epoch": 11.3, + "learning_rate": 5.7954545454545464e-05, + "loss": 0.3939, "step": 1000 }, { - "epoch": 20.25, - "learning_rate": 7.232142857142858e-05, - "loss": 1.0248, + "epoch": 11.31, + "learning_rate": 5.78125e-05, + "loss": 0.4199, "step": 1001 }, { - "epoch": 20.27, - "learning_rate": 7.219387755102042e-05, - "loss": 1.0493, + "epoch": 11.32, + "learning_rate": 5.767045454545454e-05, + "loss": 0.4076, "step": 1002 }, { - "epoch": 20.29, - "learning_rate": 7.206632653061225e-05, - "loss": 1.0011, + "epoch": 11.33, + "learning_rate": 5.75284090909091e-05, + "loss": 0.4079, "step": 1003 }, { - "epoch": 20.31, - "learning_rate": 7.193877551020408e-05, - "loss": 0.9874, + "epoch": 11.34, + "learning_rate": 5.738636363636364e-05, + "loss": 0.4002, "step": 1004 }, { - "epoch": 20.34, - "learning_rate": 7.181122448979593e-05, - "loss": 1.0049, + "epoch": 11.35, + "learning_rate": 5.724431818181818e-05, + "loss": 0.3801, "step": 1005 }, { - "epoch": 20.36, - "learning_rate": 7.168367346938776e-05, - "loss": 1.0314, + "epoch": 11.37, + "learning_rate": 5.7102272727272735e-05, + "loss": 0.3939, "step": 1006 }, { - "epoch": 20.38, - "learning_rate": 7.155612244897959e-05, - "loss": 0.9742, + "epoch": 11.38, + "learning_rate": 5.696022727272727e-05, + "loss": 0.3904, "step": 1007 }, { - "epoch": 20.4, - "learning_rate": 7.142857142857143e-05, - "loss": 1.0621, + "epoch": 11.39, + "learning_rate": 5.6818181818181825e-05, + "loss": 0.406, "step": 1008 }, { - "epoch": 20.42, - "learning_rate": 7.130102040816326e-05, - "loss": 0.9672, + "epoch": 11.4, + "learning_rate": 5.667613636363637e-05, + "loss": 0.4185, "step": 1009 }, { - "epoch": 20.44, - "learning_rate": 7.117346938775511e-05, - "loss": 1.0018, + "epoch": 11.41, + "learning_rate": 5.653409090909091e-05, + "loss": 0.3976, "step": 1010 }, { - "epoch": 20.46, - "learning_rate": 7.104591836734694e-05, - "loss": 1.0045, + "epoch": 11.42, + "learning_rate": 5.639204545454546e-05, + "loss": 0.3907, "step": 1011 }, { - "epoch": 20.48, - "learning_rate": 7.091836734693877e-05, - "loss": 0.9675, + "epoch": 11.43, + "learning_rate": 5.6250000000000005e-05, + "loss": 0.4065, "step": 1012 }, { - "epoch": 20.5, - "learning_rate": 7.079081632653062e-05, - "loss": 0.976, + "epoch": 11.44, + "learning_rate": 5.6107954545454544e-05, + "loss": 0.4069, "step": 1013 }, { - "epoch": 20.52, - "learning_rate": 7.066326530612245e-05, - "loss": 1.0523, + "epoch": 11.46, + "learning_rate": 5.5965909090909095e-05, + "loss": 0.3964, "step": 1014 }, { - "epoch": 20.54, - "learning_rate": 7.053571428571429e-05, - "loss": 1.052, + "epoch": 11.47, + "learning_rate": 5.582386363636364e-05, + "loss": 0.3912, "step": 1015 }, { - "epoch": 20.56, - "learning_rate": 7.040816326530612e-05, - "loss": 0.9903, + "epoch": 11.48, + "learning_rate": 5.568181818181818e-05, + "loss": 0.3944, "step": 1016 }, { - "epoch": 20.58, - "learning_rate": 7.028061224489795e-05, - "loss": 1.0337, + "epoch": 11.49, + "learning_rate": 5.553977272727273e-05, + "loss": 0.4197, "step": 1017 }, { - "epoch": 20.6, - "learning_rate": 7.01530612244898e-05, - "loss": 1.1122, + "epoch": 11.5, + "learning_rate": 5.5397727272727276e-05, + "loss": 0.4064, "step": 1018 }, { - "epoch": 20.62, - "learning_rate": 7.002551020408164e-05, - "loss": 1.0133, + "epoch": 11.51, + "learning_rate": 5.525568181818183e-05, + "loss": 0.4054, "step": 1019 }, { - "epoch": 20.64, - "learning_rate": 6.989795918367347e-05, - "loss": 0.9588, + "epoch": 11.52, + "learning_rate": 5.5113636363636366e-05, + "loss": 0.4128, "step": 1020 }, { - "epoch": 20.66, - "learning_rate": 6.977040816326532e-05, - "loss": 0.9892, + "epoch": 11.53, + "learning_rate": 5.497159090909091e-05, + "loss": 0.3976, "step": 1021 }, { - "epoch": 20.68, - "learning_rate": 6.964285714285715e-05, - "loss": 1.025, + "epoch": 11.55, + "learning_rate": 5.482954545454546e-05, + "loss": 0.3863, "step": 1022 }, { - "epoch": 20.7, - "learning_rate": 6.951530612244899e-05, - "loss": 1.0196, + "epoch": 11.56, + "learning_rate": 5.46875e-05, + "loss": 0.3994, "step": 1023 }, { - "epoch": 20.72, - "learning_rate": 6.938775510204082e-05, - "loss": 1.0146, + "epoch": 11.57, + "learning_rate": 5.4545454545454546e-05, + "loss": 0.401, "step": 1024 }, { - "epoch": 20.74, - "learning_rate": 6.926020408163265e-05, - "loss": 1.0656, + "epoch": 11.58, + "learning_rate": 5.44034090909091e-05, + "loss": 0.3948, "step": 1025 }, { - "epoch": 20.76, - "learning_rate": 6.91326530612245e-05, - "loss": 0.9584, + "epoch": 11.59, + "learning_rate": 5.4261363636363636e-05, + "loss": 0.3967, "step": 1026 }, { - "epoch": 20.78, - "learning_rate": 6.900510204081633e-05, - "loss": 0.9877, + "epoch": 11.6, + "learning_rate": 5.411931818181818e-05, + "loss": 0.413, "step": 1027 }, { - "epoch": 20.8, - "learning_rate": 6.887755102040817e-05, - "loss": 1.0607, + "epoch": 11.61, + "learning_rate": 5.397727272727273e-05, + "loss": 0.4032, "step": 1028 }, { - "epoch": 20.82, - "learning_rate": 6.875e-05, - "loss": 0.9969, + "epoch": 11.63, + "learning_rate": 5.383522727272727e-05, + "loss": 0.3905, "step": 1029 }, { - "epoch": 20.84, - "learning_rate": 6.862244897959184e-05, - "loss": 0.9506, + "epoch": 11.64, + "learning_rate": 5.3693181818181823e-05, + "loss": 0.4041, "step": 1030 }, { - "epoch": 20.86, - "learning_rate": 6.849489795918368e-05, - "loss": 1.0576, + "epoch": 11.65, + "learning_rate": 5.355113636363637e-05, + "loss": 0.392, "step": 1031 }, { - "epoch": 20.88, - "learning_rate": 6.836734693877551e-05, - "loss": 1.0094, + "epoch": 11.66, + "learning_rate": 5.340909090909091e-05, + "loss": 0.3942, "step": 1032 }, { - "epoch": 20.9, - "learning_rate": 6.823979591836735e-05, - "loss": 0.9872, + "epoch": 11.67, + "learning_rate": 5.326704545454546e-05, + "loss": 0.3946, "step": 1033 }, { - "epoch": 20.92, - "learning_rate": 6.811224489795919e-05, - "loss": 1.0544, + "epoch": 11.68, + "learning_rate": 5.3125000000000004e-05, + "loss": 0.3989, "step": 1034 }, { - "epoch": 20.94, - "learning_rate": 6.798469387755102e-05, - "loss": 1.0194, + "epoch": 11.69, + "learning_rate": 5.298295454545454e-05, + "loss": 0.4101, "step": 1035 }, { - "epoch": 20.96, - "learning_rate": 6.785714285714286e-05, - "loss": 1.0009, + "epoch": 11.7, + "learning_rate": 5.2840909090909094e-05, + "loss": 0.4033, "step": 1036 }, { - "epoch": 20.98, - "learning_rate": 6.772959183673469e-05, - "loss": 0.9727, + "epoch": 11.72, + "learning_rate": 5.269886363636364e-05, + "loss": 0.3937, "step": 1037 }, { - "epoch": 21.0, - "learning_rate": 6.760204081632652e-05, - "loss": 0.9754, + "epoch": 11.73, + "learning_rate": 5.255681818181818e-05, + "loss": 0.3873, "step": 1038 }, { - "epoch": 21.02, - "learning_rate": 6.747448979591837e-05, - "loss": 0.9953, + "epoch": 11.74, + "learning_rate": 5.241477272727273e-05, + "loss": 0.3922, "step": 1039 }, { - "epoch": 21.04, - "learning_rate": 6.73469387755102e-05, - "loss": 0.9307, + "epoch": 11.75, + "learning_rate": 5.2272727272727274e-05, + "loss": 0.4016, "step": 1040 }, { - "epoch": 21.06, - "learning_rate": 6.721938775510204e-05, - "loss": 0.9151, + "epoch": 11.76, + "learning_rate": 5.2130681818181826e-05, + "loss": 0.3892, "step": 1041 }, { - "epoch": 21.08, - "learning_rate": 6.709183673469389e-05, - "loss": 0.9474, + "epoch": 11.77, + "learning_rate": 5.1988636363636364e-05, + "loss": 0.3974, "step": 1042 }, { - "epoch": 21.1, - "learning_rate": 6.696428571428572e-05, - "loss": 0.9697, + "epoch": 11.78, + "learning_rate": 5.184659090909091e-05, + "loss": 0.4024, "step": 1043 }, { - "epoch": 21.12, - "learning_rate": 6.683673469387756e-05, - "loss": 0.9423, + "epoch": 11.79, + "learning_rate": 5.170454545454546e-05, + "loss": 0.3889, "step": 1044 }, { - "epoch": 21.14, - "learning_rate": 6.670918367346939e-05, - "loss": 0.9797, + "epoch": 11.81, + "learning_rate": 5.15625e-05, + "loss": 0.4097, "step": 1045 }, { - "epoch": 21.16, - "learning_rate": 6.658163265306124e-05, - "loss": 0.919, + "epoch": 11.82, + "learning_rate": 5.1420454545454545e-05, + "loss": 0.4047, "step": 1046 }, { - "epoch": 21.18, - "learning_rate": 6.645408163265307e-05, - "loss": 0.9743, + "epoch": 11.83, + "learning_rate": 5.12784090909091e-05, + "loss": 0.4033, "step": 1047 }, { - "epoch": 21.21, - "learning_rate": 6.63265306122449e-05, - "loss": 0.9575, + "epoch": 11.84, + "learning_rate": 5.1136363636363635e-05, + "loss": 0.3774, "step": 1048 }, { - "epoch": 21.23, - "learning_rate": 6.619897959183674e-05, - "loss": 0.9861, + "epoch": 11.85, + "learning_rate": 5.099431818181818e-05, + "loss": 0.405, "step": 1049 }, { - "epoch": 21.25, - "learning_rate": 6.607142857142857e-05, - "loss": 0.9103, + "epoch": 11.86, + "learning_rate": 5.085227272727273e-05, + "loss": 0.3996, "step": 1050 }, { - "epoch": 21.27, - "learning_rate": 6.594387755102042e-05, - "loss": 0.993, + "epoch": 11.87, + "learning_rate": 5.071022727272727e-05, + "loss": 0.3885, "step": 1051 }, { - "epoch": 21.29, - "learning_rate": 6.581632653061225e-05, - "loss": 0.9668, + "epoch": 11.88, + "learning_rate": 5.056818181818183e-05, + "loss": 0.3914, "step": 1052 }, { - "epoch": 21.31, - "learning_rate": 6.568877551020408e-05, - "loss": 1.0008, + "epoch": 11.9, + "learning_rate": 5.042613636363637e-05, + "loss": 0.3908, "step": 1053 }, { - "epoch": 21.33, - "learning_rate": 6.556122448979592e-05, - "loss": 0.9825, + "epoch": 11.91, + "learning_rate": 5.0284090909090905e-05, + "loss": 0.3921, "step": 1054 }, { - "epoch": 21.35, - "learning_rate": 6.543367346938776e-05, - "loss": 1.0174, + "epoch": 11.92, + "learning_rate": 5.0142045454545464e-05, + "loss": 0.4077, "step": 1055 }, { - "epoch": 21.37, - "learning_rate": 6.530612244897959e-05, - "loss": 0.9685, + "epoch": 11.93, + "learning_rate": 5e-05, + "loss": 0.3973, "step": 1056 }, { - "epoch": 21.39, - "learning_rate": 6.517857142857143e-05, - "loss": 0.9265, + "epoch": 11.94, + "learning_rate": 4.985795454545455e-05, + "loss": 0.3986, "step": 1057 }, { - "epoch": 21.41, - "learning_rate": 6.505102040816326e-05, - "loss": 0.9495, + "epoch": 11.95, + "learning_rate": 4.971590909090909e-05, + "loss": 0.3938, "step": 1058 }, { - "epoch": 21.43, - "learning_rate": 6.49234693877551e-05, - "loss": 0.9541, + "epoch": 11.96, + "learning_rate": 4.957386363636364e-05, + "loss": 0.3897, "step": 1059 }, { - "epoch": 21.45, - "learning_rate": 6.479591836734694e-05, - "loss": 0.9299, + "epoch": 11.98, + "learning_rate": 4.943181818181818e-05, + "loss": 0.3965, "step": 1060 }, { - "epoch": 21.47, - "learning_rate": 6.466836734693877e-05, - "loss": 0.9625, + "epoch": 11.99, + "learning_rate": 4.9289772727272735e-05, + "loss": 0.3999, "step": 1061 }, { - "epoch": 21.49, - "learning_rate": 6.454081632653061e-05, - "loss": 1.0054, + "epoch": 12.0, + "learning_rate": 4.914772727272727e-05, + "loss": 0.3814, "step": 1062 }, { - "epoch": 21.51, - "learning_rate": 6.441326530612244e-05, - "loss": 0.9893, + "epoch": 12.01, + "learning_rate": 4.900568181818182e-05, + "loss": 0.3879, "step": 1063 }, { - "epoch": 21.53, - "learning_rate": 6.428571428571429e-05, - "loss": 0.9906, + "epoch": 12.02, + "learning_rate": 4.886363636363637e-05, + "loss": 0.3768, "step": 1064 }, { - "epoch": 21.55, - "learning_rate": 6.415816326530613e-05, - "loss": 0.9487, + "epoch": 12.03, + "learning_rate": 4.8721590909090915e-05, + "loss": 0.3813, "step": 1065 }, { - "epoch": 21.57, - "learning_rate": 6.403061224489796e-05, - "loss": 0.9728, + "epoch": 12.04, + "learning_rate": 4.857954545454545e-05, + "loss": 0.3759, "step": 1066 }, { - "epoch": 21.59, - "learning_rate": 6.390306122448981e-05, - "loss": 0.9883, + "epoch": 12.05, + "learning_rate": 4.8437500000000005e-05, + "loss": 0.3817, "step": 1067 }, { - "epoch": 21.61, - "learning_rate": 6.377551020408164e-05, - "loss": 1.053, + "epoch": 12.07, + "learning_rate": 4.829545454545455e-05, + "loss": 0.3773, "step": 1068 }, { - "epoch": 21.63, - "learning_rate": 6.364795918367348e-05, - "loss": 1.012, + "epoch": 12.08, + "learning_rate": 4.815340909090909e-05, + "loss": 0.3807, "step": 1069 }, { - "epoch": 21.65, - "learning_rate": 6.352040816326531e-05, - "loss": 0.962, + "epoch": 12.09, + "learning_rate": 4.801136363636364e-05, + "loss": 0.3757, "step": 1070 }, { - "epoch": 21.67, - "learning_rate": 6.339285714285714e-05, - "loss": 0.9955, + "epoch": 12.1, + "learning_rate": 4.7869318181818185e-05, + "loss": 0.3819, "step": 1071 }, { - "epoch": 21.69, - "learning_rate": 6.326530612244899e-05, - "loss": 0.9908, + "epoch": 12.11, + "learning_rate": 4.772727272727273e-05, + "loss": 0.3731, "step": 1072 }, { - "epoch": 21.71, - "learning_rate": 6.313775510204082e-05, - "loss": 1.0327, + "epoch": 12.12, + "learning_rate": 4.7585227272727276e-05, + "loss": 0.3706, "step": 1073 }, { - "epoch": 21.73, - "learning_rate": 6.301020408163265e-05, - "loss": 0.9255, + "epoch": 12.13, + "learning_rate": 4.744318181818182e-05, + "loss": 0.3762, "step": 1074 }, { - "epoch": 21.75, - "learning_rate": 6.28826530612245e-05, - "loss": 0.9268, + "epoch": 12.14, + "learning_rate": 4.7301136363636366e-05, + "loss": 0.3749, "step": 1075 }, { - "epoch": 21.77, - "learning_rate": 6.275510204081633e-05, - "loss": 0.9204, + "epoch": 12.16, + "learning_rate": 4.715909090909091e-05, + "loss": 0.3884, "step": 1076 }, { - "epoch": 21.79, - "learning_rate": 6.262755102040817e-05, - "loss": 0.9838, + "epoch": 12.17, + "learning_rate": 4.7017045454545456e-05, + "loss": 0.373, "step": 1077 }, { - "epoch": 21.81, - "learning_rate": 6.25e-05, - "loss": 0.954, + "epoch": 12.18, + "learning_rate": 4.6875e-05, + "loss": 0.374, "step": 1078 }, { - "epoch": 21.83, - "learning_rate": 6.237244897959183e-05, - "loss": 1.0102, + "epoch": 12.19, + "learning_rate": 4.6732954545454546e-05, + "loss": 0.3813, "step": 1079 }, { - "epoch": 21.85, - "learning_rate": 6.224489795918368e-05, - "loss": 0.916, + "epoch": 12.2, + "learning_rate": 4.659090909090909e-05, + "loss": 0.3745, "step": 1080 }, { - "epoch": 21.87, - "learning_rate": 6.211734693877551e-05, - "loss": 0.9939, + "epoch": 12.21, + "learning_rate": 4.6448863636363636e-05, + "loss": 0.3646, "step": 1081 }, { - "epoch": 21.89, - "learning_rate": 6.198979591836735e-05, - "loss": 0.9675, + "epoch": 12.22, + "learning_rate": 4.630681818181818e-05, + "loss": 0.3729, "step": 1082 }, { - "epoch": 21.91, - "learning_rate": 6.186224489795918e-05, - "loss": 0.9666, + "epoch": 12.24, + "learning_rate": 4.616477272727273e-05, + "loss": 0.3701, "step": 1083 }, { - "epoch": 21.93, - "learning_rate": 6.173469387755101e-05, - "loss": 0.9919, + "epoch": 12.25, + "learning_rate": 4.602272727272727e-05, + "loss": 0.3655, "step": 1084 }, { - "epoch": 21.95, - "learning_rate": 6.160714285714286e-05, - "loss": 1.0106, + "epoch": 12.26, + "learning_rate": 4.5880681818181817e-05, + "loss": 0.3758, "step": 1085 }, { - "epoch": 21.97, - "learning_rate": 6.14795918367347e-05, - "loss": 0.9982, + "epoch": 12.27, + "learning_rate": 4.573863636363637e-05, + "loss": 0.3682, "step": 1086 }, { - "epoch": 21.99, - "learning_rate": 6.135204081632653e-05, - "loss": 1.0137, + "epoch": 12.28, + "learning_rate": 4.5596590909090913e-05, + "loss": 0.3865, "step": 1087 }, { - "epoch": 22.01, - "learning_rate": 6.122448979591838e-05, - "loss": 0.9331, + "epoch": 12.29, + "learning_rate": 4.545454545454546e-05, + "loss": 0.363, "step": 1088 }, { - "epoch": 22.03, - "learning_rate": 6.109693877551021e-05, - "loss": 0.8834, + "epoch": 12.3, + "learning_rate": 4.5312500000000004e-05, + "loss": 0.3727, "step": 1089 }, { - "epoch": 22.06, - "learning_rate": 6.0969387755102046e-05, - "loss": 0.9757, + "epoch": 12.31, + "learning_rate": 4.517045454545455e-05, + "loss": 0.3827, "step": 1090 }, { - "epoch": 22.08, - "learning_rate": 6.084183673469388e-05, - "loss": 0.9038, + "epoch": 12.33, + "learning_rate": 4.5028409090909094e-05, + "loss": 0.3658, "step": 1091 }, { - "epoch": 22.1, - "learning_rate": 6.0714285714285715e-05, - "loss": 0.9097, + "epoch": 12.34, + "learning_rate": 4.488636363636364e-05, + "loss": 0.3844, "step": 1092 }, { - "epoch": 22.12, - "learning_rate": 6.058673469387756e-05, - "loss": 0.8972, + "epoch": 12.35, + "learning_rate": 4.4744318181818184e-05, + "loss": 0.3731, "step": 1093 }, { - "epoch": 22.14, - "learning_rate": 6.045918367346939e-05, - "loss": 0.8825, + "epoch": 12.36, + "learning_rate": 4.460227272727273e-05, + "loss": 0.3767, "step": 1094 }, { - "epoch": 22.16, - "learning_rate": 6.0331632653061234e-05, - "loss": 0.9814, + "epoch": 12.37, + "learning_rate": 4.4460227272727274e-05, + "loss": 0.3751, "step": 1095 }, { - "epoch": 22.18, - "learning_rate": 6.0204081632653065e-05, - "loss": 0.9874, + "epoch": 12.38, + "learning_rate": 4.431818181818182e-05, + "loss": 0.3718, "step": 1096 }, { - "epoch": 22.2, - "learning_rate": 6.0076530612244896e-05, - "loss": 0.912, + "epoch": 12.39, + "learning_rate": 4.4176136363636364e-05, + "loss": 0.3833, "step": 1097 }, { - "epoch": 22.22, - "learning_rate": 5.994897959183674e-05, - "loss": 0.9206, + "epoch": 12.4, + "learning_rate": 4.4034090909090916e-05, + "loss": 0.3673, "step": 1098 }, { - "epoch": 22.24, - "learning_rate": 5.982142857142857e-05, - "loss": 0.9497, + "epoch": 12.42, + "learning_rate": 4.3892045454545454e-05, + "loss": 0.3799, "step": 1099 }, { - "epoch": 22.26, - "learning_rate": 5.9693877551020416e-05, - "loss": 0.9269, + "epoch": 12.43, + "learning_rate": 4.375e-05, + "loss": 0.3661, "step": 1100 }, { - "epoch": 22.28, - "learning_rate": 5.956632653061225e-05, - "loss": 0.9452, + "epoch": 12.44, + "learning_rate": 4.360795454545455e-05, + "loss": 0.3554, "step": 1101 }, { - "epoch": 22.3, - "learning_rate": 5.9438775510204084e-05, - "loss": 0.9548, + "epoch": 12.45, + "learning_rate": 4.346590909090909e-05, + "loss": 0.3787, "step": 1102 }, { - "epoch": 22.32, - "learning_rate": 5.931122448979592e-05, - "loss": 0.9689, + "epoch": 12.46, + "learning_rate": 4.3323863636363635e-05, + "loss": 0.3695, "step": 1103 }, { - "epoch": 22.34, - "learning_rate": 5.918367346938776e-05, - "loss": 0.9455, + "epoch": 12.47, + "learning_rate": 4.318181818181819e-05, + "loss": 0.3777, "step": 1104 }, { - "epoch": 22.36, - "learning_rate": 5.905612244897959e-05, - "loss": 0.9409, + "epoch": 12.48, + "learning_rate": 4.303977272727273e-05, + "loss": 0.3693, "step": 1105 }, { - "epoch": 22.38, - "learning_rate": 5.8928571428571435e-05, - "loss": 0.9093, + "epoch": 12.49, + "learning_rate": 4.289772727272727e-05, + "loss": 0.3731, "step": 1106 }, { - "epoch": 22.4, - "learning_rate": 5.8801020408163266e-05, - "loss": 0.921, + "epoch": 12.51, + "learning_rate": 4.275568181818182e-05, + "loss": 0.3659, "step": 1107 }, { - "epoch": 22.42, - "learning_rate": 5.867346938775511e-05, - "loss": 0.9368, + "epoch": 12.52, + "learning_rate": 4.261363636363637e-05, + "loss": 0.3689, "step": 1108 }, { - "epoch": 22.44, - "learning_rate": 5.854591836734694e-05, - "loss": 0.907, + "epoch": 12.53, + "learning_rate": 4.247159090909091e-05, + "loss": 0.3625, "step": 1109 }, { - "epoch": 22.46, - "learning_rate": 5.841836734693877e-05, - "loss": 0.9126, + "epoch": 12.54, + "learning_rate": 4.232954545454546e-05, + "loss": 0.3874, "step": 1110 }, { - "epoch": 22.48, - "learning_rate": 5.8290816326530616e-05, - "loss": 0.9161, + "epoch": 12.55, + "learning_rate": 4.21875e-05, + "loss": 0.3651, "step": 1111 }, { - "epoch": 22.5, - "learning_rate": 5.816326530612245e-05, - "loss": 0.9542, + "epoch": 12.56, + "learning_rate": 4.204545454545455e-05, + "loss": 0.3639, "step": 1112 }, { - "epoch": 22.52, - "learning_rate": 5.803571428571429e-05, - "loss": 0.9775, + "epoch": 12.57, + "learning_rate": 4.190340909090909e-05, + "loss": 0.378, "step": 1113 }, { - "epoch": 22.54, - "learning_rate": 5.790816326530612e-05, - "loss": 1.0006, + "epoch": 12.59, + "learning_rate": 4.176136363636364e-05, + "loss": 0.3726, "step": 1114 }, { - "epoch": 22.56, - "learning_rate": 5.778061224489796e-05, - "loss": 0.8965, + "epoch": 12.6, + "learning_rate": 4.161931818181818e-05, + "loss": 0.3732, "step": 1115 }, { - "epoch": 22.58, - "learning_rate": 5.7653061224489805e-05, - "loss": 0.944, + "epoch": 12.61, + "learning_rate": 4.1477272727272734e-05, + "loss": 0.3673, "step": 1116 }, { - "epoch": 22.6, - "learning_rate": 5.7525510204081636e-05, - "loss": 0.9162, + "epoch": 12.62, + "learning_rate": 4.133522727272727e-05, + "loss": 0.3566, "step": 1117 }, { - "epoch": 22.62, - "learning_rate": 5.739795918367348e-05, - "loss": 0.9325, + "epoch": 12.63, + "learning_rate": 4.119318181818182e-05, + "loss": 0.3757, "step": 1118 }, { - "epoch": 22.64, - "learning_rate": 5.727040816326531e-05, - "loss": 0.8998, + "epoch": 12.64, + "learning_rate": 4.105113636363637e-05, + "loss": 0.3739, "step": 1119 }, { - "epoch": 22.66, - "learning_rate": 5.714285714285714e-05, - "loss": 0.9362, + "epoch": 12.65, + "learning_rate": 4.0909090909090915e-05, + "loss": 0.3768, "step": 1120 }, { - "epoch": 22.68, - "learning_rate": 5.7015306122448986e-05, - "loss": 0.9969, + "epoch": 12.66, + "learning_rate": 4.076704545454545e-05, + "loss": 0.3758, "step": 1121 }, { - "epoch": 22.7, - "learning_rate": 5.688775510204082e-05, - "loss": 0.9104, + "epoch": 12.68, + "learning_rate": 4.0625000000000005e-05, + "loss": 0.3655, "step": 1122 }, { - "epoch": 22.72, - "learning_rate": 5.676020408163265e-05, - "loss": 0.9746, + "epoch": 12.69, + "learning_rate": 4.048295454545455e-05, + "loss": 0.3673, "step": 1123 }, { - "epoch": 22.74, - "learning_rate": 5.663265306122449e-05, - "loss": 0.9821, + "epoch": 12.7, + "learning_rate": 4.034090909090909e-05, + "loss": 0.3683, "step": 1124 }, { - "epoch": 22.76, - "learning_rate": 5.650510204081633e-05, - "loss": 0.9526, + "epoch": 12.71, + "learning_rate": 4.019886363636364e-05, + "loss": 0.3569, "step": 1125 }, { - "epoch": 22.78, - "learning_rate": 5.637755102040817e-05, - "loss": 0.871, + "epoch": 12.72, + "learning_rate": 4.0056818181818185e-05, + "loss": 0.3741, "step": 1126 }, { - "epoch": 22.8, - "learning_rate": 5.6250000000000005e-05, - "loss": 0.9534, + "epoch": 12.73, + "learning_rate": 3.991477272727273e-05, + "loss": 0.3817, "step": 1127 }, { - "epoch": 22.82, - "learning_rate": 5.6122448979591836e-05, - "loss": 0.9616, + "epoch": 12.74, + "learning_rate": 3.9772727272727275e-05, + "loss": 0.3748, "step": 1128 }, { - "epoch": 22.84, - "learning_rate": 5.599489795918368e-05, - "loss": 0.9627, + "epoch": 12.75, + "learning_rate": 3.963068181818182e-05, + "loss": 0.3625, "step": 1129 }, { - "epoch": 22.86, - "learning_rate": 5.586734693877551e-05, - "loss": 0.9704, + "epoch": 12.77, + "learning_rate": 3.9488636363636366e-05, + "loss": 0.3656, "step": 1130 }, { - "epoch": 22.88, - "learning_rate": 5.5739795918367356e-05, - "loss": 0.9506, + "epoch": 12.78, + "learning_rate": 3.934659090909091e-05, + "loss": 0.3564, "step": 1131 }, { - "epoch": 22.9, - "learning_rate": 5.561224489795919e-05, - "loss": 0.9553, + "epoch": 12.79, + "learning_rate": 3.9204545454545456e-05, + "loss": 0.3737, "step": 1132 }, { - "epoch": 22.93, - "learning_rate": 5.548469387755102e-05, - "loss": 0.9294, + "epoch": 12.8, + "learning_rate": 3.90625e-05, + "loss": 0.3649, "step": 1133 }, { - "epoch": 22.95, - "learning_rate": 5.535714285714286e-05, - "loss": 0.8979, + "epoch": 12.81, + "learning_rate": 3.8920454545454546e-05, + "loss": 0.3728, "step": 1134 }, { - "epoch": 22.97, - "learning_rate": 5.522959183673469e-05, - "loss": 1.0004, + "epoch": 12.82, + "learning_rate": 3.877840909090909e-05, + "loss": 0.3865, "step": 1135 }, { - "epoch": 22.99, - "learning_rate": 5.510204081632653e-05, - "loss": 0.9821, + "epoch": 12.83, + "learning_rate": 3.8636363636363636e-05, + "loss": 0.3866, "step": 1136 }, { - "epoch": 23.01, - "learning_rate": 5.497448979591837e-05, - "loss": 0.9607, + "epoch": 12.85, + "learning_rate": 3.849431818181818e-05, + "loss": 0.3725, "step": 1137 }, { - "epoch": 23.03, - "learning_rate": 5.4846938775510206e-05, - "loss": 0.9757, + "epoch": 12.86, + "learning_rate": 3.835227272727273e-05, + "loss": 0.3662, "step": 1138 }, { - "epoch": 23.05, - "learning_rate": 5.471938775510205e-05, - "loss": 0.9096, + "epoch": 12.87, + "learning_rate": 3.821022727272727e-05, + "loss": 0.3742, "step": 1139 }, { - "epoch": 23.07, - "learning_rate": 5.459183673469388e-05, - "loss": 0.9144, + "epoch": 12.88, + "learning_rate": 3.8068181818181816e-05, + "loss": 0.3727, "step": 1140 }, { - "epoch": 23.09, - "learning_rate": 5.446428571428571e-05, - "loss": 0.8667, + "epoch": 12.89, + "learning_rate": 3.792613636363637e-05, + "loss": 0.3653, "step": 1141 }, { - "epoch": 23.11, - "learning_rate": 5.4336734693877556e-05, - "loss": 0.8993, + "epoch": 12.9, + "learning_rate": 3.778409090909091e-05, + "loss": 0.3631, "step": 1142 }, { - "epoch": 23.13, - "learning_rate": 5.420918367346939e-05, - "loss": 0.8964, + "epoch": 12.91, + "learning_rate": 3.764204545454545e-05, + "loss": 0.3674, "step": 1143 }, { - "epoch": 23.15, - "learning_rate": 5.408163265306123e-05, - "loss": 0.9173, + "epoch": 12.92, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.3598, "step": 1144 }, { - "epoch": 23.17, - "learning_rate": 5.395408163265306e-05, - "loss": 0.9019, + "epoch": 12.94, + "learning_rate": 3.735795454545455e-05, + "loss": 0.3697, "step": 1145 }, { - "epoch": 23.19, - "learning_rate": 5.382653061224489e-05, - "loss": 0.9303, + "epoch": 12.95, + "learning_rate": 3.721590909090909e-05, + "loss": 0.3639, "step": 1146 }, { - "epoch": 23.21, - "learning_rate": 5.369897959183674e-05, - "loss": 0.9268, + "epoch": 12.96, + "learning_rate": 3.707386363636364e-05, + "loss": 0.3597, "step": 1147 }, { - "epoch": 23.23, - "learning_rate": 5.3571428571428575e-05, - "loss": 0.8803, + "epoch": 12.97, + "learning_rate": 3.6931818181818184e-05, + "loss": 0.3815, "step": 1148 }, { - "epoch": 23.25, - "learning_rate": 5.344387755102041e-05, - "loss": 0.9197, + "epoch": 12.98, + "learning_rate": 3.678977272727273e-05, + "loss": 0.3477, "step": 1149 }, { - "epoch": 23.27, - "learning_rate": 5.331632653061225e-05, - "loss": 0.9204, + "epoch": 12.99, + "learning_rate": 3.6647727272727274e-05, + "loss": 0.3631, "step": 1150 }, { - "epoch": 23.29, - "learning_rate": 5.318877551020408e-05, - "loss": 0.8802, + "epoch": 13.0, + "learning_rate": 3.650568181818182e-05, + "loss": 0.3569, "step": 1151 }, { - "epoch": 23.31, - "learning_rate": 5.3061224489795926e-05, - "loss": 0.9044, + "epoch": 13.01, + "learning_rate": 3.6363636363636364e-05, + "loss": 0.3435, "step": 1152 }, { - "epoch": 23.33, - "learning_rate": 5.293367346938776e-05, - "loss": 0.8893, + "epoch": 13.03, + "learning_rate": 3.6221590909090916e-05, + "loss": 0.3504, "step": 1153 }, { - "epoch": 23.35, - "learning_rate": 5.280612244897959e-05, - "loss": 0.8928, + "epoch": 13.04, + "learning_rate": 3.6079545454545454e-05, + "loss": 0.3582, "step": 1154 }, { - "epoch": 23.37, - "learning_rate": 5.267857142857143e-05, - "loss": 0.9353, + "epoch": 13.05, + "learning_rate": 3.59375e-05, + "loss": 0.356, "step": 1155 }, { - "epoch": 23.39, - "learning_rate": 5.255102040816326e-05, - "loss": 0.9345, + "epoch": 13.06, + "learning_rate": 3.579545454545455e-05, + "loss": 0.3506, "step": 1156 }, { - "epoch": 23.41, - "learning_rate": 5.242346938775511e-05, - "loss": 0.9372, + "epoch": 13.07, + "learning_rate": 3.565340909090909e-05, + "loss": 0.3628, "step": 1157 }, { - "epoch": 23.43, - "learning_rate": 5.229591836734694e-05, - "loss": 0.9234, + "epoch": 13.08, + "learning_rate": 3.5511363636363635e-05, + "loss": 0.3494, "step": 1158 }, { - "epoch": 23.45, - "learning_rate": 5.2168367346938776e-05, - "loss": 0.9177, + "epoch": 13.09, + "learning_rate": 3.5369318181818186e-05, + "loss": 0.3653, "step": 1159 }, { - "epoch": 23.47, - "learning_rate": 5.2040816326530614e-05, - "loss": 0.8757, + "epoch": 13.11, + "learning_rate": 3.522727272727273e-05, + "loss": 0.3515, "step": 1160 }, { - "epoch": 23.49, - "learning_rate": 5.191326530612245e-05, - "loss": 0.9048, + "epoch": 13.12, + "learning_rate": 3.508522727272727e-05, + "loss": 0.3474, "step": 1161 }, { - "epoch": 23.51, - "learning_rate": 5.1785714285714296e-05, - "loss": 0.9248, + "epoch": 13.13, + "learning_rate": 3.494318181818182e-05, + "loss": 0.3469, "step": 1162 }, { - "epoch": 23.53, - "learning_rate": 5.1658163265306127e-05, - "loss": 0.9379, + "epoch": 13.14, + "learning_rate": 3.480113636363637e-05, + "loss": 0.3471, "step": 1163 }, { - "epoch": 23.55, - "learning_rate": 5.153061224489796e-05, - "loss": 0.8596, + "epoch": 13.15, + "learning_rate": 3.465909090909091e-05, + "loss": 0.355, "step": 1164 }, { - "epoch": 23.57, - "learning_rate": 5.14030612244898e-05, - "loss": 0.9751, + "epoch": 13.16, + "learning_rate": 3.451704545454546e-05, + "loss": 0.3532, "step": 1165 }, { - "epoch": 23.59, - "learning_rate": 5.127551020408163e-05, - "loss": 0.8842, + "epoch": 13.17, + "learning_rate": 3.4375e-05, + "loss": 0.3533, "step": 1166 }, { - "epoch": 23.61, - "learning_rate": 5.114795918367348e-05, - "loss": 0.8765, + "epoch": 13.18, + "learning_rate": 3.423295454545455e-05, + "loss": 0.3571, "step": 1167 }, { - "epoch": 23.63, - "learning_rate": 5.102040816326531e-05, - "loss": 0.8942, + "epoch": 13.2, + "learning_rate": 3.409090909090909e-05, + "loss": 0.3435, "step": 1168 }, { - "epoch": 23.65, - "learning_rate": 5.089285714285714e-05, - "loss": 0.938, + "epoch": 13.21, + "learning_rate": 3.394886363636364e-05, + "loss": 0.348, "step": 1169 }, { - "epoch": 23.67, - "learning_rate": 5.076530612244898e-05, - "loss": 0.8993, + "epoch": 13.22, + "learning_rate": 3.380681818181818e-05, + "loss": 0.3505, "step": 1170 }, { - "epoch": 23.69, - "learning_rate": 5.063775510204082e-05, - "loss": 0.9362, + "epoch": 13.23, + "learning_rate": 3.3664772727272734e-05, + "loss": 0.346, "step": 1171 }, { - "epoch": 23.71, - "learning_rate": 5.051020408163265e-05, - "loss": 0.9249, + "epoch": 13.24, + "learning_rate": 3.352272727272727e-05, + "loss": 0.3568, "step": 1172 }, { - "epoch": 23.73, - "learning_rate": 5.0382653061224496e-05, - "loss": 0.9055, + "epoch": 13.25, + "learning_rate": 3.338068181818182e-05, + "loss": 0.3548, "step": 1173 }, { - "epoch": 23.75, - "learning_rate": 5.025510204081633e-05, - "loss": 0.8967, + "epoch": 13.26, + "learning_rate": 3.323863636363637e-05, + "loss": 0.352, "step": 1174 }, { - "epoch": 23.77, - "learning_rate": 5.012755102040817e-05, - "loss": 0.8795, + "epoch": 13.27, + "learning_rate": 3.3096590909090915e-05, + "loss": 0.3561, "step": 1175 }, { - "epoch": 23.8, - "learning_rate": 5e-05, - "loss": 0.9452, + "epoch": 13.29, + "learning_rate": 3.295454545454545e-05, + "loss": 0.3424, "step": 1176 }, { - "epoch": 23.82, - "learning_rate": 4.987244897959184e-05, - "loss": 0.926, + "epoch": 13.3, + "learning_rate": 3.2812500000000005e-05, + "loss": 0.3453, "step": 1177 }, { - "epoch": 23.84, - "learning_rate": 4.974489795918368e-05, - "loss": 0.8948, + "epoch": 13.31, + "learning_rate": 3.267045454545455e-05, + "loss": 0.347, "step": 1178 }, { - "epoch": 23.86, - "learning_rate": 4.961734693877551e-05, - "loss": 0.8926, + "epoch": 13.32, + "learning_rate": 3.252840909090909e-05, + "loss": 0.3526, "step": 1179 }, { - "epoch": 23.88, - "learning_rate": 4.9489795918367346e-05, - "loss": 0.8949, + "epoch": 13.33, + "learning_rate": 3.238636363636364e-05, + "loss": 0.3634, "step": 1180 }, { - "epoch": 23.9, - "learning_rate": 4.9362244897959184e-05, - "loss": 0.9648, + "epoch": 13.34, + "learning_rate": 3.2244318181818185e-05, + "loss": 0.3591, "step": 1181 }, { - "epoch": 23.92, - "learning_rate": 4.923469387755102e-05, - "loss": 0.9599, + "epoch": 13.35, + "learning_rate": 3.210227272727273e-05, + "loss": 0.3449, "step": 1182 }, { - "epoch": 23.94, - "learning_rate": 4.910714285714286e-05, - "loss": 0.9603, + "epoch": 13.36, + "learning_rate": 3.1960227272727275e-05, + "loss": 0.3362, "step": 1183 }, { - "epoch": 23.96, - "learning_rate": 4.89795918367347e-05, - "loss": 0.9302, + "epoch": 13.38, + "learning_rate": 3.181818181818182e-05, + "loss": 0.3613, "step": 1184 }, { - "epoch": 23.98, - "learning_rate": 4.8852040816326534e-05, - "loss": 0.9261, + "epoch": 13.39, + "learning_rate": 3.1676136363636365e-05, + "loss": 0.3509, "step": 1185 }, { - "epoch": 24.0, - "learning_rate": 4.872448979591837e-05, - "loss": 0.9257, + "epoch": 13.4, + "learning_rate": 3.153409090909091e-05, + "loss": 0.3534, "step": 1186 }, { - "epoch": 24.02, - "learning_rate": 4.859693877551021e-05, - "loss": 0.8725, + "epoch": 13.41, + "learning_rate": 3.1392045454545456e-05, + "loss": 0.3452, "step": 1187 }, { - "epoch": 24.04, - "learning_rate": 4.846938775510204e-05, - "loss": 0.8486, + "epoch": 13.42, + "learning_rate": 3.125e-05, + "loss": 0.3659, "step": 1188 }, { - "epoch": 24.06, - "learning_rate": 4.834183673469388e-05, - "loss": 0.8457, + "epoch": 13.43, + "learning_rate": 3.1107954545454546e-05, + "loss": 0.3445, "step": 1189 }, { - "epoch": 24.08, - "learning_rate": 4.8214285714285716e-05, - "loss": 0.7848, + "epoch": 13.44, + "learning_rate": 3.096590909090909e-05, + "loss": 0.3536, "step": 1190 }, { - "epoch": 24.1, - "learning_rate": 4.8086734693877554e-05, - "loss": 0.8885, + "epoch": 13.46, + "learning_rate": 3.0823863636363636e-05, + "loss": 0.3588, "step": 1191 }, { - "epoch": 24.12, - "learning_rate": 4.795918367346939e-05, - "loss": 0.9099, + "epoch": 13.47, + "learning_rate": 3.068181818181818e-05, + "loss": 0.347, "step": 1192 }, { - "epoch": 24.14, - "learning_rate": 4.783163265306123e-05, - "loss": 0.9147, + "epoch": 13.48, + "learning_rate": 3.053977272727273e-05, + "loss": 0.3537, "step": 1193 }, { - "epoch": 24.16, - "learning_rate": 4.7704081632653066e-05, - "loss": 0.8781, + "epoch": 13.49, + "learning_rate": 3.039772727272727e-05, + "loss": 0.3516, "step": 1194 }, { - "epoch": 24.18, - "learning_rate": 4.7576530612244904e-05, - "loss": 0.8847, + "epoch": 13.5, + "learning_rate": 3.025568181818182e-05, + "loss": 0.3587, "step": 1195 }, { - "epoch": 24.2, - "learning_rate": 4.744897959183674e-05, - "loss": 0.9041, + "epoch": 13.51, + "learning_rate": 3.0113636363636365e-05, + "loss": 0.3494, "step": 1196 }, { - "epoch": 24.22, - "learning_rate": 4.732142857142857e-05, - "loss": 0.8639, + "epoch": 13.52, + "learning_rate": 2.9971590909090913e-05, + "loss": 0.3341, "step": 1197 }, { - "epoch": 24.24, - "learning_rate": 4.719387755102041e-05, - "loss": 0.8831, + "epoch": 13.53, + "learning_rate": 2.9829545454545455e-05, + "loss": 0.3563, "step": 1198 }, { - "epoch": 24.26, - "learning_rate": 4.706632653061225e-05, - "loss": 0.9063, + "epoch": 13.55, + "learning_rate": 2.96875e-05, + "loss": 0.3534, "step": 1199 }, { - "epoch": 24.28, - "learning_rate": 4.6938775510204086e-05, - "loss": 0.8753, + "epoch": 13.56, + "learning_rate": 2.954545454545455e-05, + "loss": 0.3632, "step": 1200 }, { - "epoch": 24.3, - "learning_rate": 4.6811224489795916e-05, - "loss": 0.8977, + "epoch": 13.57, + "learning_rate": 2.940340909090909e-05, + "loss": 0.3523, "step": 1201 }, { - "epoch": 24.32, - "learning_rate": 4.6683673469387754e-05, - "loss": 0.8729, + "epoch": 13.58, + "learning_rate": 2.9261363636363635e-05, + "loss": 0.3587, "step": 1202 }, { - "epoch": 24.34, - "learning_rate": 4.655612244897959e-05, - "loss": 0.898, + "epoch": 13.59, + "learning_rate": 2.9119318181818184e-05, + "loss": 0.3523, "step": 1203 }, { - "epoch": 24.36, - "learning_rate": 4.642857142857143e-05, - "loss": 0.8521, + "epoch": 13.6, + "learning_rate": 2.8977272727272732e-05, + "loss": 0.3418, "step": 1204 }, { - "epoch": 24.38, - "learning_rate": 4.630102040816327e-05, - "loss": 0.8563, + "epoch": 13.61, + "learning_rate": 2.883522727272727e-05, + "loss": 0.3515, "step": 1205 }, { - "epoch": 24.4, - "learning_rate": 4.6173469387755105e-05, - "loss": 0.8462, + "epoch": 13.62, + "learning_rate": 2.869318181818182e-05, + "loss": 0.3362, "step": 1206 }, { - "epoch": 24.42, - "learning_rate": 4.604591836734694e-05, - "loss": 0.8929, + "epoch": 13.64, + "learning_rate": 2.8551136363636367e-05, + "loss": 0.3393, "step": 1207 }, { - "epoch": 24.44, - "learning_rate": 4.591836734693878e-05, - "loss": 0.9154, + "epoch": 13.65, + "learning_rate": 2.8409090909090912e-05, + "loss": 0.3395, "step": 1208 }, { - "epoch": 24.46, - "learning_rate": 4.579081632653062e-05, - "loss": 0.811, + "epoch": 13.66, + "learning_rate": 2.8267045454545454e-05, + "loss": 0.3434, "step": 1209 }, { - "epoch": 24.48, - "learning_rate": 4.566326530612245e-05, - "loss": 0.8667, + "epoch": 13.67, + "learning_rate": 2.8125000000000003e-05, + "loss": 0.3536, "step": 1210 }, { - "epoch": 24.5, - "learning_rate": 4.5535714285714286e-05, - "loss": 0.9179, + "epoch": 13.68, + "learning_rate": 2.7982954545454548e-05, + "loss": 0.3482, "step": 1211 }, { - "epoch": 24.52, - "learning_rate": 4.5408163265306124e-05, - "loss": 0.8757, + "epoch": 13.69, + "learning_rate": 2.784090909090909e-05, + "loss": 0.3383, "step": 1212 }, { - "epoch": 24.54, - "learning_rate": 4.528061224489796e-05, - "loss": 0.8519, + "epoch": 13.7, + "learning_rate": 2.7698863636363638e-05, + "loss": 0.3582, "step": 1213 }, { - "epoch": 24.56, - "learning_rate": 4.515306122448979e-05, - "loss": 0.9335, + "epoch": 13.72, + "learning_rate": 2.7556818181818183e-05, + "loss": 0.3469, "step": 1214 }, { - "epoch": 24.58, - "learning_rate": 4.502551020408164e-05, - "loss": 0.8785, + "epoch": 13.73, + "learning_rate": 2.741477272727273e-05, + "loss": 0.351, "step": 1215 }, { - "epoch": 24.6, - "learning_rate": 4.4897959183673474e-05, - "loss": 0.9022, + "epoch": 13.74, + "learning_rate": 2.7272727272727273e-05, + "loss": 0.3461, "step": 1216 }, { - "epoch": 24.62, - "learning_rate": 4.477040816326531e-05, - "loss": 0.9532, + "epoch": 13.75, + "learning_rate": 2.7130681818181818e-05, + "loss": 0.3497, "step": 1217 }, { - "epoch": 24.64, - "learning_rate": 4.464285714285715e-05, - "loss": 0.8956, + "epoch": 13.76, + "learning_rate": 2.6988636363636367e-05, + "loss": 0.3503, "step": 1218 }, { - "epoch": 24.67, - "learning_rate": 4.451530612244898e-05, - "loss": 0.8739, + "epoch": 13.77, + "learning_rate": 2.6846590909090912e-05, + "loss": 0.3499, "step": 1219 }, { - "epoch": 24.69, - "learning_rate": 4.438775510204082e-05, - "loss": 0.9312, + "epoch": 13.78, + "learning_rate": 2.6704545454545453e-05, + "loss": 0.3529, "step": 1220 }, { - "epoch": 24.71, - "learning_rate": 4.4260204081632656e-05, - "loss": 0.8536, + "epoch": 13.79, + "learning_rate": 2.6562500000000002e-05, + "loss": 0.3516, "step": 1221 }, { - "epoch": 24.73, - "learning_rate": 4.4132653061224493e-05, - "loss": 0.8984, + "epoch": 13.81, + "learning_rate": 2.6420454545454547e-05, + "loss": 0.3457, "step": 1222 }, { - "epoch": 24.75, - "learning_rate": 4.4005102040816324e-05, - "loss": 0.8949, + "epoch": 13.82, + "learning_rate": 2.627840909090909e-05, + "loss": 0.3482, "step": 1223 }, { - "epoch": 24.77, - "learning_rate": 4.387755102040816e-05, - "loss": 0.9389, + "epoch": 13.83, + "learning_rate": 2.6136363636363637e-05, + "loss": 0.3458, "step": 1224 }, { - "epoch": 24.79, - "learning_rate": 4.375e-05, - "loss": 0.8703, + "epoch": 13.84, + "learning_rate": 2.5994318181818182e-05, + "loss": 0.3442, "step": 1225 }, { - "epoch": 24.81, - "learning_rate": 4.362244897959184e-05, - "loss": 0.9407, + "epoch": 13.85, + "learning_rate": 2.585227272727273e-05, + "loss": 0.3717, "step": 1226 }, { - "epoch": 24.83, - "learning_rate": 4.3494897959183675e-05, - "loss": 0.9016, + "epoch": 13.86, + "learning_rate": 2.5710227272727272e-05, + "loss": 0.3404, "step": 1227 }, { - "epoch": 24.85, - "learning_rate": 4.336734693877551e-05, - "loss": 0.9025, + "epoch": 13.87, + "learning_rate": 2.5568181818181817e-05, + "loss": 0.34, "step": 1228 }, { - "epoch": 24.87, - "learning_rate": 4.323979591836735e-05, - "loss": 0.9415, + "epoch": 13.88, + "learning_rate": 2.5426136363636366e-05, + "loss": 0.3615, "step": 1229 }, { - "epoch": 24.89, - "learning_rate": 4.311224489795919e-05, - "loss": 0.9146, + "epoch": 13.9, + "learning_rate": 2.5284090909090914e-05, + "loss": 0.3473, "step": 1230 }, { - "epoch": 24.91, - "learning_rate": 4.2984693877551025e-05, - "loss": 0.9144, + "epoch": 13.91, + "learning_rate": 2.5142045454545453e-05, + "loss": 0.3532, "step": 1231 }, { - "epoch": 24.93, - "learning_rate": 4.2857142857142856e-05, - "loss": 0.9138, + "epoch": 13.92, + "learning_rate": 2.5e-05, + "loss": 0.3524, "step": 1232 }, { - "epoch": 24.95, - "learning_rate": 4.2729591836734694e-05, - "loss": 0.9372, + "epoch": 13.93, + "learning_rate": 2.4857954545454546e-05, + "loss": 0.3681, "step": 1233 }, { - "epoch": 24.97, - "learning_rate": 4.260204081632653e-05, - "loss": 0.8701, + "epoch": 13.94, + "learning_rate": 2.471590909090909e-05, + "loss": 0.3432, "step": 1234 }, { - "epoch": 24.99, - "learning_rate": 4.247448979591837e-05, - "loss": 0.9278, + "epoch": 13.95, + "learning_rate": 2.4573863636363636e-05, + "loss": 0.3507, "step": 1235 }, { - "epoch": 25.01, - "learning_rate": 4.234693877551021e-05, - "loss": 0.9157, + "epoch": 13.96, + "learning_rate": 2.4431818181818185e-05, + "loss": 0.3448, "step": 1236 }, { - "epoch": 25.03, - "learning_rate": 4.2219387755102045e-05, - "loss": 0.8852, + "epoch": 13.97, + "learning_rate": 2.4289772727272727e-05, + "loss": 0.3501, "step": 1237 }, { - "epoch": 25.05, - "learning_rate": 4.209183673469388e-05, - "loss": 0.855, + "epoch": 13.99, + "learning_rate": 2.4147727272727275e-05, + "loss": 0.3624, "step": 1238 }, { - "epoch": 25.07, - "learning_rate": 4.196428571428572e-05, - "loss": 0.8547, + "epoch": 14.0, + "learning_rate": 2.400568181818182e-05, + "loss": 0.3446, "step": 1239 }, { - "epoch": 25.09, - "learning_rate": 4.183673469387756e-05, - "loss": 0.8691, + "epoch": 14.01, + "learning_rate": 2.3863636363636365e-05, + "loss": 0.3463, "step": 1240 }, { - "epoch": 25.11, - "learning_rate": 4.170918367346939e-05, - "loss": 0.9101, + "epoch": 14.02, + "learning_rate": 2.372159090909091e-05, + "loss": 0.3492, "step": 1241 }, { - "epoch": 25.13, - "learning_rate": 4.1581632653061226e-05, - "loss": 0.8408, + "epoch": 14.03, + "learning_rate": 2.3579545454545455e-05, + "loss": 0.3352, "step": 1242 }, { - "epoch": 25.15, - "learning_rate": 4.1454081632653064e-05, - "loss": 0.9008, + "epoch": 14.04, + "learning_rate": 2.34375e-05, + "loss": 0.3368, "step": 1243 }, { - "epoch": 25.17, - "learning_rate": 4.13265306122449e-05, - "loss": 0.859, + "epoch": 14.05, + "learning_rate": 2.3295454545454546e-05, + "loss": 0.3295, "step": 1244 }, { - "epoch": 25.19, - "learning_rate": 4.119897959183674e-05, - "loss": 0.8525, + "epoch": 14.07, + "learning_rate": 2.315340909090909e-05, + "loss": 0.3426, "step": 1245 }, { - "epoch": 25.21, - "learning_rate": 4.107142857142857e-05, - "loss": 0.8682, + "epoch": 14.08, + "learning_rate": 2.3011363636363636e-05, + "loss": 0.3284, "step": 1246 }, { - "epoch": 25.23, - "learning_rate": 4.094387755102041e-05, - "loss": 0.8426, + "epoch": 14.09, + "learning_rate": 2.2869318181818184e-05, + "loss": 0.3305, "step": 1247 }, { - "epoch": 25.25, - "learning_rate": 4.0816326530612245e-05, - "loss": 0.8948, + "epoch": 14.1, + "learning_rate": 2.272727272727273e-05, + "loss": 0.3447, "step": 1248 }, { - "epoch": 25.27, - "learning_rate": 4.068877551020408e-05, - "loss": 0.8333, + "epoch": 14.11, + "learning_rate": 2.2585227272727274e-05, + "loss": 0.344, "step": 1249 }, { - "epoch": 25.29, - "learning_rate": 4.056122448979592e-05, - "loss": 0.87, + "epoch": 14.12, + "learning_rate": 2.244318181818182e-05, + "loss": 0.3516, "step": 1250 }, { - "epoch": 25.31, - "learning_rate": 4.043367346938776e-05, - "loss": 0.8215, + "epoch": 14.13, + "learning_rate": 2.2301136363636365e-05, + "loss": 0.3417, "step": 1251 }, { - "epoch": 25.33, - "learning_rate": 4.0306122448979596e-05, - "loss": 0.862, + "epoch": 14.14, + "learning_rate": 2.215909090909091e-05, + "loss": 0.3353, "step": 1252 }, { - "epoch": 25.35, - "learning_rate": 4.017857142857143e-05, - "loss": 0.8607, + "epoch": 14.16, + "learning_rate": 2.2017045454545458e-05, + "loss": 0.3363, "step": 1253 }, { - "epoch": 25.37, - "learning_rate": 4.0051020408163264e-05, - "loss": 0.8218, + "epoch": 14.17, + "learning_rate": 2.1875e-05, + "loss": 0.3342, "step": 1254 }, { - "epoch": 25.39, - "learning_rate": 3.99234693877551e-05, - "loss": 0.806, + "epoch": 14.18, + "learning_rate": 2.1732954545454545e-05, + "loss": 0.3366, "step": 1255 }, { - "epoch": 25.41, - "learning_rate": 3.979591836734694e-05, - "loss": 0.8929, + "epoch": 14.19, + "learning_rate": 2.1590909090909093e-05, + "loss": 0.3333, "step": 1256 }, { - "epoch": 25.43, - "learning_rate": 3.966836734693878e-05, - "loss": 0.8551, + "epoch": 14.2, + "learning_rate": 2.1448863636363635e-05, + "loss": 0.3425, "step": 1257 }, { - "epoch": 25.45, - "learning_rate": 3.9540816326530615e-05, - "loss": 0.8408, + "epoch": 14.21, + "learning_rate": 2.1306818181818183e-05, + "loss": 0.3366, "step": 1258 }, { - "epoch": 25.47, - "learning_rate": 3.9413265306122446e-05, - "loss": 0.8819, + "epoch": 14.22, + "learning_rate": 2.116477272727273e-05, + "loss": 0.3324, "step": 1259 }, { - "epoch": 25.49, - "learning_rate": 3.928571428571429e-05, - "loss": 0.8757, + "epoch": 14.23, + "learning_rate": 2.1022727272727274e-05, + "loss": 0.3527, "step": 1260 }, { - "epoch": 25.52, - "learning_rate": 3.915816326530613e-05, - "loss": 0.8778, + "epoch": 14.25, + "learning_rate": 2.088068181818182e-05, + "loss": 0.3399, "step": 1261 }, { - "epoch": 25.54, - "learning_rate": 3.9030612244897965e-05, - "loss": 0.8524, + "epoch": 14.26, + "learning_rate": 2.0738636363636367e-05, + "loss": 0.3651, "step": 1262 }, { - "epoch": 25.56, - "learning_rate": 3.8903061224489796e-05, - "loss": 0.846, + "epoch": 14.27, + "learning_rate": 2.059659090909091e-05, + "loss": 0.3381, "step": 1263 }, { - "epoch": 25.58, - "learning_rate": 3.8775510204081634e-05, - "loss": 0.8757, + "epoch": 14.28, + "learning_rate": 2.0454545454545457e-05, + "loss": 0.3408, "step": 1264 }, { - "epoch": 25.6, - "learning_rate": 3.864795918367347e-05, - "loss": 0.9084, + "epoch": 14.29, + "learning_rate": 2.0312500000000002e-05, + "loss": 0.3237, "step": 1265 }, { - "epoch": 25.62, - "learning_rate": 3.852040816326531e-05, - "loss": 0.8826, + "epoch": 14.3, + "learning_rate": 2.0170454545454544e-05, + "loss": 0.3421, "step": 1266 }, { - "epoch": 25.64, - "learning_rate": 3.839285714285715e-05, - "loss": 0.8619, + "epoch": 14.31, + "learning_rate": 2.0028409090909093e-05, + "loss": 0.3361, "step": 1267 }, { - "epoch": 25.66, - "learning_rate": 3.826530612244898e-05, - "loss": 0.8942, + "epoch": 14.33, + "learning_rate": 1.9886363636363638e-05, + "loss": 0.3475, "step": 1268 }, { - "epoch": 25.68, - "learning_rate": 3.8137755102040815e-05, - "loss": 0.8342, + "epoch": 14.34, + "learning_rate": 1.9744318181818183e-05, + "loss": 0.3337, "step": 1269 }, { - "epoch": 25.7, - "learning_rate": 3.801020408163265e-05, - "loss": 0.8512, + "epoch": 14.35, + "learning_rate": 1.9602272727272728e-05, + "loss": 0.3384, "step": 1270 }, { - "epoch": 25.72, - "learning_rate": 3.788265306122449e-05, - "loss": 0.8393, + "epoch": 14.36, + "learning_rate": 1.9460227272727273e-05, + "loss": 0.3255, "step": 1271 }, { - "epoch": 25.74, - "learning_rate": 3.775510204081633e-05, - "loss": 0.8508, + "epoch": 14.37, + "learning_rate": 1.9318181818181818e-05, + "loss": 0.3568, "step": 1272 }, { - "epoch": 25.76, - "learning_rate": 3.7627551020408166e-05, - "loss": 0.9094, + "epoch": 14.38, + "learning_rate": 1.9176136363636366e-05, + "loss": 0.3427, "step": 1273 }, { - "epoch": 25.78, - "learning_rate": 3.7500000000000003e-05, - "loss": 0.9175, + "epoch": 14.39, + "learning_rate": 1.9034090909090908e-05, + "loss": 0.3468, "step": 1274 }, { - "epoch": 25.8, - "learning_rate": 3.737244897959184e-05, - "loss": 0.9179, + "epoch": 14.4, + "learning_rate": 1.8892045454545457e-05, + "loss": 0.3312, "step": 1275 }, { - "epoch": 25.82, - "learning_rate": 3.724489795918368e-05, - "loss": 0.869, + "epoch": 14.42, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.3376, "step": 1276 }, { - "epoch": 25.84, - "learning_rate": 3.711734693877551e-05, - "loss": 0.8568, + "epoch": 14.43, + "learning_rate": 1.8607954545454543e-05, + "loss": 0.3284, "step": 1277 }, { - "epoch": 25.86, - "learning_rate": 3.698979591836735e-05, - "loss": 0.9104, + "epoch": 14.44, + "learning_rate": 1.8465909090909092e-05, + "loss": 0.3378, "step": 1278 }, { - "epoch": 25.88, - "learning_rate": 3.6862244897959185e-05, - "loss": 0.8912, + "epoch": 14.45, + "learning_rate": 1.8323863636363637e-05, + "loss": 0.3331, "step": 1279 }, { - "epoch": 25.9, - "learning_rate": 3.673469387755102e-05, - "loss": 0.878, + "epoch": 14.46, + "learning_rate": 1.8181818181818182e-05, + "loss": 0.3297, "step": 1280 }, { - "epoch": 25.92, - "learning_rate": 3.6607142857142853e-05, - "loss": 0.8711, + "epoch": 14.47, + "learning_rate": 1.8039772727272727e-05, + "loss": 0.3491, "step": 1281 }, { - "epoch": 25.94, - "learning_rate": 3.64795918367347e-05, - "loss": 0.9404, + "epoch": 14.48, + "learning_rate": 1.7897727272727276e-05, + "loss": 0.3354, "step": 1282 }, { - "epoch": 25.96, - "learning_rate": 3.6352040816326536e-05, - "loss": 0.9302, + "epoch": 14.49, + "learning_rate": 1.7755681818181817e-05, + "loss": 0.3292, "step": 1283 }, { - "epoch": 25.98, - "learning_rate": 3.622448979591837e-05, - "loss": 0.8907, + "epoch": 14.51, + "learning_rate": 1.7613636363636366e-05, + "loss": 0.3419, "step": 1284 }, { - "epoch": 26.0, - "learning_rate": 3.609693877551021e-05, - "loss": 0.8473, + "epoch": 14.52, + "learning_rate": 1.747159090909091e-05, + "loss": 0.329, "step": 1285 }, { - "epoch": 26.02, - "learning_rate": 3.596938775510204e-05, - "loss": 0.8482, + "epoch": 14.53, + "learning_rate": 1.7329545454545456e-05, + "loss": 0.345, "step": 1286 }, { - "epoch": 26.04, - "learning_rate": 3.584183673469388e-05, - "loss": 0.8683, + "epoch": 14.54, + "learning_rate": 1.71875e-05, + "loss": 0.3369, "step": 1287 }, { - "epoch": 26.06, - "learning_rate": 3.571428571428572e-05, - "loss": 0.8443, + "epoch": 14.55, + "learning_rate": 1.7045454545454546e-05, + "loss": 0.3537, "step": 1288 }, { - "epoch": 26.08, - "learning_rate": 3.5586734693877555e-05, - "loss": 0.8462, + "epoch": 14.56, + "learning_rate": 1.690340909090909e-05, + "loss": 0.3418, "step": 1289 }, { - "epoch": 26.1, - "learning_rate": 3.5459183673469385e-05, - "loss": 0.8204, + "epoch": 14.57, + "learning_rate": 1.6761363636363636e-05, + "loss": 0.3514, "step": 1290 }, { - "epoch": 26.12, - "learning_rate": 3.533163265306122e-05, - "loss": 0.8632, + "epoch": 14.58, + "learning_rate": 1.6619318181818185e-05, + "loss": 0.3325, "step": 1291 }, { - "epoch": 26.14, - "learning_rate": 3.520408163265306e-05, - "loss": 0.8883, + "epoch": 14.6, + "learning_rate": 1.6477272727272726e-05, + "loss": 0.3492, "step": 1292 }, { - "epoch": 26.16, - "learning_rate": 3.50765306122449e-05, - "loss": 0.8369, + "epoch": 14.61, + "learning_rate": 1.6335227272727275e-05, + "loss": 0.3439, "step": 1293 }, { - "epoch": 26.18, - "learning_rate": 3.4948979591836736e-05, - "loss": 0.8369, + "epoch": 14.62, + "learning_rate": 1.619318181818182e-05, + "loss": 0.3401, "step": 1294 }, { - "epoch": 26.2, - "learning_rate": 3.4821428571428574e-05, - "loss": 0.8506, + "epoch": 14.63, + "learning_rate": 1.6051136363636365e-05, + "loss": 0.3373, "step": 1295 }, { - "epoch": 26.22, - "learning_rate": 3.469387755102041e-05, - "loss": 0.839, + "epoch": 14.64, + "learning_rate": 1.590909090909091e-05, + "loss": 0.339, "step": 1296 }, { - "epoch": 26.24, - "learning_rate": 3.456632653061225e-05, - "loss": 0.8421, + "epoch": 14.65, + "learning_rate": 1.5767045454545455e-05, + "loss": 0.3229, "step": 1297 }, { - "epoch": 26.26, - "learning_rate": 3.443877551020409e-05, - "loss": 0.8292, + "epoch": 14.66, + "learning_rate": 1.5625e-05, + "loss": 0.3369, "step": 1298 }, { - "epoch": 26.28, - "learning_rate": 3.431122448979592e-05, - "loss": 0.8412, + "epoch": 14.68, + "learning_rate": 1.5482954545454545e-05, + "loss": 0.3295, "step": 1299 }, { - "epoch": 26.3, - "learning_rate": 3.4183673469387755e-05, - "loss": 0.8305, + "epoch": 14.69, + "learning_rate": 1.534090909090909e-05, + "loss": 0.3333, "step": 1300 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 1.9253105037500006e+18, + "total_flos": 1.7745797986691174e+18, "trial_name": null, "trial_params": null } diff --git a/checkpoint-1300/training_args.bin b/checkpoint-1300/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-1300/training_args.bin +++ b/checkpoint-1300/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-1400/adapter_model.bin b/checkpoint-1400/adapter_model.bin index 4745aee31110084e19714506670916f3c62d1f94..9ce03b063e69bd157fb884b8d6d13fa2d3cbc9eb 100644 --- a/checkpoint-1400/adapter_model.bin +++ b/checkpoint-1400/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:019860e9604c2bce3e1eb82f3a576a7272afd222d602f5fba5c803b83043ee76 +oid sha256:e9fe5245989faeee0c2750a2146ba97c670136934dbaf55c59859a18ae47f724 size 39409357 diff --git a/checkpoint-1400/optimizer.pt b/checkpoint-1400/optimizer.pt index 1a5d1001a74ae6e7fcb4afa45ee820537e022650..2466949c474a16c07b671601ea3e3d28a117f673 100644 --- a/checkpoint-1400/optimizer.pt +++ b/checkpoint-1400/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dff811fb285c320d3235ecb226a73a05ba2c9fa3c8dcea292fb556c245eba3c7 +oid sha256:9aaac4c5de0c70c37dba56a1293e33680564febdb0dd9e90b3edc151a02b5c6f size 78844421 diff --git a/checkpoint-1400/rng_state.pth b/checkpoint-1400/rng_state.pth index 55458251cf9b9dd5303788f73cb2ddd459a88907..ce32862861da12da23bbb01ee9f59c5232cb111d 100644 --- a/checkpoint-1400/rng_state.pth +++ b/checkpoint-1400/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:feeeda336ac9687793442afff9ff589b66dc74c1c2b27bafbcd1072d4b5fa37d +oid sha256:51f3c17c1e7ce128318be2166e96b6dda4d1ff79c8b44e6fb06488bf0bbfcf8d size 14575 diff --git a/checkpoint-1400/scheduler.pt b/checkpoint-1400/scheduler.pt index 8c0bb19c7480c9e99af2b0ca613475fbdd4e6141..bc56c0fa05b89cade570ebaad0a9d0302f454057 100644 --- a/checkpoint-1400/scheduler.pt +++ b/checkpoint-1400/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc463b7a5519b8283bc8f09760b55dbcf1be2f24e13b19140789bdc09c3e1d5d +oid sha256:d2781c7d6f9147f112526f8c8a2660aa0c2e5cae1ef90b804b4446ea6776f625 size 627 diff --git a/checkpoint-1400/trainer_state.json b/checkpoint-1400/trainer_state.json index 8d441a440c3a045965abb373837074c2d4d12482..5d2cc80d3dd6c220ca130dd80d5976114a96a0e0 100644 --- a/checkpoint-1400/trainer_state.json +++ b/checkpoint-1400/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 28.327537148276953, + "epoch": 15.816416593115623, "eval_steps": 500, "global_step": 1400, "is_hyper_param_search": false, @@ -9,8411 +9,8411 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 }, { - "epoch": 14.18, - "learning_rate": 0.00011058673469387754, - "loss": 1.2069, + "epoch": 7.92, + "learning_rate": 0.00010042613636363636, + "loss": 0.6857, "step": 701 }, { - "epoch": 14.2, - "learning_rate": 0.00011045918367346939, - "loss": 1.1968, + "epoch": 7.93, + "learning_rate": 0.00010028409090909093, + "loss": 0.6677, "step": 702 }, { - "epoch": 14.22, - "learning_rate": 0.00011033163265306123, - "loss": 1.2236, + "epoch": 7.94, + "learning_rate": 0.00010014204545454546, + "loss": 0.6697, "step": 703 }, { - "epoch": 14.24, - "learning_rate": 0.00011020408163265306, - "loss": 1.1942, + "epoch": 7.95, + "learning_rate": 0.0001, + "loss": 0.6375, "step": 704 }, { - "epoch": 14.26, - "learning_rate": 0.0001100765306122449, - "loss": 1.2561, + "epoch": 7.96, + "learning_rate": 9.985795454545455e-05, + "loss": 0.6572, "step": 705 }, { - "epoch": 14.29, - "learning_rate": 0.00010994897959183674, - "loss": 1.1839, + "epoch": 7.98, + "learning_rate": 9.97159090909091e-05, + "loss": 0.668, "step": 706 }, { - "epoch": 14.31, - "learning_rate": 0.00010982142857142858, - "loss": 1.2128, + "epoch": 7.99, + "learning_rate": 9.957386363636364e-05, + "loss": 0.6797, "step": 707 }, { - "epoch": 14.33, - "learning_rate": 0.00010969387755102041, - "loss": 1.3086, + "epoch": 8.0, + "learning_rate": 9.943181818181819e-05, + "loss": 0.6784, "step": 708 }, { - "epoch": 14.35, - "learning_rate": 0.00010956632653061226, - "loss": 1.2379, + "epoch": 8.01, + "learning_rate": 9.928977272727273e-05, + "loss": 0.6192, "step": 709 }, { - "epoch": 14.37, - "learning_rate": 0.0001094387755102041, - "loss": 1.176, + "epoch": 8.02, + "learning_rate": 9.914772727272728e-05, + "loss": 0.6287, "step": 710 }, { - "epoch": 14.39, - "learning_rate": 0.00010931122448979592, - "loss": 1.2105, + "epoch": 8.03, + "learning_rate": 9.900568181818183e-05, + "loss": 0.6034, "step": 711 }, { - "epoch": 14.41, - "learning_rate": 0.00010918367346938776, - "loss": 1.2149, + "epoch": 8.04, + "learning_rate": 9.886363636363637e-05, + "loss": 0.6167, "step": 712 }, { - "epoch": 14.43, - "learning_rate": 0.0001090561224489796, - "loss": 1.2392, + "epoch": 8.06, + "learning_rate": 9.872159090909091e-05, + "loss": 0.6353, "step": 713 }, { - "epoch": 14.45, - "learning_rate": 0.00010892857142857142, - "loss": 1.2471, + "epoch": 8.07, + "learning_rate": 9.857954545454547e-05, + "loss": 0.6222, "step": 714 }, { - "epoch": 14.47, - "learning_rate": 0.00010880102040816327, - "loss": 1.2561, + "epoch": 8.08, + "learning_rate": 9.84375e-05, + "loss": 0.5963, "step": 715 }, { - "epoch": 14.49, - "learning_rate": 0.00010867346938775511, - "loss": 1.2179, + "epoch": 8.09, + "learning_rate": 9.829545454545455e-05, + "loss": 0.6042, "step": 716 }, { - "epoch": 14.51, - "learning_rate": 0.00010854591836734696, - "loss": 1.2459, + "epoch": 8.1, + "learning_rate": 9.81534090909091e-05, + "loss": 0.612, "step": 717 }, { - "epoch": 14.53, - "learning_rate": 0.00010841836734693877, - "loss": 1.2933, + "epoch": 8.11, + "learning_rate": 9.801136363636364e-05, + "loss": 0.6069, "step": 718 }, { - "epoch": 14.55, - "learning_rate": 0.00010829081632653062, - "loss": 1.2862, + "epoch": 8.12, + "learning_rate": 9.786931818181818e-05, + "loss": 0.6001, "step": 719 }, { - "epoch": 14.57, - "learning_rate": 0.00010816326530612246, - "loss": 1.2976, + "epoch": 8.13, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6007, "step": 720 }, { - "epoch": 14.59, - "learning_rate": 0.00010803571428571428, - "loss": 1.231, + "epoch": 8.15, + "learning_rate": 9.758522727272727e-05, + "loss": 0.6079, "step": 721 }, { - "epoch": 14.61, - "learning_rate": 0.00010790816326530613, - "loss": 1.2464, + "epoch": 8.16, + "learning_rate": 9.744318181818183e-05, + "loss": 0.6216, "step": 722 }, { - "epoch": 14.63, - "learning_rate": 0.00010778061224489797, - "loss": 1.2181, + "epoch": 8.17, + "learning_rate": 9.730113636363637e-05, + "loss": 0.6321, "step": 723 }, { - "epoch": 14.65, - "learning_rate": 0.00010765306122448979, - "loss": 1.3307, + "epoch": 8.18, + "learning_rate": 9.71590909090909e-05, + "loss": 0.6044, "step": 724 }, { - "epoch": 14.67, - "learning_rate": 0.00010752551020408163, - "loss": 1.1723, + "epoch": 8.19, + "learning_rate": 9.701704545454547e-05, + "loss": 0.6028, "step": 725 }, { - "epoch": 14.69, - "learning_rate": 0.00010739795918367348, - "loss": 1.1528, + "epoch": 8.2, + "learning_rate": 9.687500000000001e-05, + "loss": 0.6098, "step": 726 }, { - "epoch": 14.71, - "learning_rate": 0.0001072704081632653, - "loss": 1.215, + "epoch": 8.21, + "learning_rate": 9.673295454545454e-05, + "loss": 0.6032, "step": 727 }, { - "epoch": 14.73, - "learning_rate": 0.00010714285714285715, - "loss": 1.2624, + "epoch": 8.22, + "learning_rate": 9.65909090909091e-05, + "loss": 0.6298, "step": 728 }, { - "epoch": 14.75, - "learning_rate": 0.00010701530612244898, - "loss": 1.3117, + "epoch": 8.24, + "learning_rate": 9.644886363636365e-05, + "loss": 0.6115, "step": 729 }, { - "epoch": 14.77, - "learning_rate": 0.00010688775510204083, - "loss": 1.2572, + "epoch": 8.25, + "learning_rate": 9.630681818181818e-05, + "loss": 0.6052, "step": 730 }, { - "epoch": 14.79, - "learning_rate": 0.00010676020408163266, - "loss": 1.222, + "epoch": 8.26, + "learning_rate": 9.616477272727274e-05, + "loss": 0.6097, "step": 731 }, { - "epoch": 14.81, - "learning_rate": 0.0001066326530612245, - "loss": 1.2881, + "epoch": 8.27, + "learning_rate": 9.602272727272728e-05, + "loss": 0.6062, "step": 732 }, { - "epoch": 14.83, - "learning_rate": 0.00010650510204081635, - "loss": 1.2676, + "epoch": 8.28, + "learning_rate": 9.588068181818183e-05, + "loss": 0.5984, "step": 733 }, { - "epoch": 14.85, - "learning_rate": 0.00010637755102040816, - "loss": 1.2734, + "epoch": 8.29, + "learning_rate": 9.573863636363637e-05, + "loss": 0.6432, "step": 734 }, { - "epoch": 14.87, - "learning_rate": 0.00010625000000000001, - "loss": 1.2885, + "epoch": 8.3, + "learning_rate": 9.559659090909092e-05, + "loss": 0.5814, "step": 735 }, { - "epoch": 14.89, - "learning_rate": 0.00010612244897959185, - "loss": 1.2764, + "epoch": 8.31, + "learning_rate": 9.545454545454546e-05, + "loss": 0.5965, "step": 736 }, { - "epoch": 14.91, - "learning_rate": 0.00010599489795918367, - "loss": 1.3267, + "epoch": 8.33, + "learning_rate": 9.53125e-05, + "loss": 0.6102, "step": 737 }, { - "epoch": 14.93, - "learning_rate": 0.00010586734693877551, - "loss": 1.2445, + "epoch": 8.34, + "learning_rate": 9.517045454545455e-05, + "loss": 0.5849, "step": 738 }, { - "epoch": 14.95, - "learning_rate": 0.00010573979591836736, - "loss": 1.3359, + "epoch": 8.35, + "learning_rate": 9.50284090909091e-05, + "loss": 0.6062, "step": 739 }, { - "epoch": 14.97, - "learning_rate": 0.00010561224489795918, - "loss": 1.2508, + "epoch": 8.36, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6031, "step": 740 }, { - "epoch": 14.99, - "learning_rate": 0.00010548469387755102, - "loss": 1.2227, + "epoch": 8.37, + "learning_rate": 9.474431818181819e-05, + "loss": 0.5932, "step": 741 }, { - "epoch": 15.01, - "learning_rate": 0.00010535714285714286, - "loss": 1.1889, + "epoch": 8.38, + "learning_rate": 9.460227272727273e-05, + "loss": 0.589, "step": 742 }, { - "epoch": 15.03, - "learning_rate": 0.00010522959183673471, - "loss": 1.1919, + "epoch": 8.39, + "learning_rate": 9.446022727272728e-05, + "loss": 0.6096, "step": 743 }, { - "epoch": 15.05, - "learning_rate": 0.00010510204081632653, - "loss": 1.2383, + "epoch": 8.41, + "learning_rate": 9.431818181818182e-05, + "loss": 0.601, "step": 744 }, { - "epoch": 15.07, - "learning_rate": 0.00010497448979591837, - "loss": 1.2401, + "epoch": 8.42, + "learning_rate": 9.417613636363637e-05, + "loss": 0.5798, "step": 745 }, { - "epoch": 15.09, - "learning_rate": 0.00010484693877551021, - "loss": 1.2015, + "epoch": 8.43, + "learning_rate": 9.403409090909091e-05, + "loss": 0.59, "step": 746 }, { - "epoch": 15.11, - "learning_rate": 0.00010471938775510203, - "loss": 1.1509, + "epoch": 8.44, + "learning_rate": 9.389204545454546e-05, + "loss": 0.5988, "step": 747 }, { - "epoch": 15.13, - "learning_rate": 0.00010459183673469388, - "loss": 1.1878, + "epoch": 8.45, + "learning_rate": 9.375e-05, + "loss": 0.5591, "step": 748 }, { - "epoch": 15.16, - "learning_rate": 0.00010446428571428572, - "loss": 1.1706, + "epoch": 8.46, + "learning_rate": 9.360795454545455e-05, + "loss": 0.5939, "step": 749 }, { - "epoch": 15.18, - "learning_rate": 0.00010433673469387755, - "loss": 1.1285, + "epoch": 8.47, + "learning_rate": 9.346590909090909e-05, + "loss": 0.5886, "step": 750 }, { - "epoch": 15.2, - "learning_rate": 0.0001042091836734694, - "loss": 1.1608, + "epoch": 8.48, + "learning_rate": 9.332386363636364e-05, + "loss": 0.5994, "step": 751 }, { - "epoch": 15.22, - "learning_rate": 0.00010408163265306123, - "loss": 1.1178, + "epoch": 8.5, + "learning_rate": 9.318181818181818e-05, + "loss": 0.5821, "step": 752 }, { - "epoch": 15.24, - "learning_rate": 0.00010395408163265306, - "loss": 1.1293, + "epoch": 8.51, + "learning_rate": 9.303977272727273e-05, + "loss": 0.602, "step": 753 }, { - "epoch": 15.26, - "learning_rate": 0.0001038265306122449, - "loss": 1.2306, + "epoch": 8.52, + "learning_rate": 9.289772727272727e-05, + "loss": 0.5708, "step": 754 }, { - "epoch": 15.28, - "learning_rate": 0.00010369897959183675, - "loss": 1.1541, + "epoch": 8.53, + "learning_rate": 9.275568181818183e-05, + "loss": 0.5902, "step": 755 }, { - "epoch": 15.3, - "learning_rate": 0.00010357142857142859, - "loss": 1.1702, + "epoch": 8.54, + "learning_rate": 9.261363636363636e-05, + "loss": 0.6053, "step": 756 }, { - "epoch": 15.32, - "learning_rate": 0.00010344387755102041, - "loss": 1.2119, + "epoch": 8.55, + "learning_rate": 9.247159090909091e-05, + "loss": 0.5797, "step": 757 }, { - "epoch": 15.34, - "learning_rate": 0.00010331632653061225, - "loss": 1.2239, + "epoch": 8.56, + "learning_rate": 9.232954545454547e-05, + "loss": 0.5965, "step": 758 }, { - "epoch": 15.36, - "learning_rate": 0.0001031887755102041, - "loss": 1.2019, + "epoch": 8.57, + "learning_rate": 9.21875e-05, + "loss": 0.5738, "step": 759 }, { - "epoch": 15.38, - "learning_rate": 0.00010306122448979591, - "loss": 1.2197, + "epoch": 8.59, + "learning_rate": 9.204545454545454e-05, + "loss": 0.5819, "step": 760 }, { - "epoch": 15.4, - "learning_rate": 0.00010293367346938776, - "loss": 1.1769, + "epoch": 8.6, + "learning_rate": 9.19034090909091e-05, + "loss": 0.5994, "step": 761 }, { - "epoch": 15.42, - "learning_rate": 0.0001028061224489796, - "loss": 1.1907, + "epoch": 8.61, + "learning_rate": 9.176136363636363e-05, + "loss": 0.5738, "step": 762 }, { - "epoch": 15.44, - "learning_rate": 0.00010267857142857142, - "loss": 1.2089, + "epoch": 8.62, + "learning_rate": 9.161931818181818e-05, + "loss": 0.5663, "step": 763 }, { - "epoch": 15.46, - "learning_rate": 0.00010255102040816327, - "loss": 1.1335, + "epoch": 8.63, + "learning_rate": 9.147727272727274e-05, + "loss": 0.5798, "step": 764 }, { - "epoch": 15.48, - "learning_rate": 0.00010242346938775511, - "loss": 1.1633, + "epoch": 8.64, + "learning_rate": 9.133522727272727e-05, + "loss": 0.5705, "step": 765 }, { - "epoch": 15.5, - "learning_rate": 0.00010229591836734695, - "loss": 1.1578, + "epoch": 8.65, + "learning_rate": 9.119318181818183e-05, + "loss": 0.5943, "step": 766 }, { - "epoch": 15.52, - "learning_rate": 0.00010216836734693877, - "loss": 1.2236, + "epoch": 8.67, + "learning_rate": 9.105113636363637e-05, + "loss": 0.6019, "step": 767 }, { - "epoch": 15.54, - "learning_rate": 0.00010204081632653062, - "loss": 1.1941, + "epoch": 8.68, + "learning_rate": 9.090909090909092e-05, + "loss": 0.5733, "step": 768 }, { - "epoch": 15.56, - "learning_rate": 0.00010191326530612246, - "loss": 1.2666, + "epoch": 8.69, + "learning_rate": 9.076704545454546e-05, + "loss": 0.575, "step": 769 }, { - "epoch": 15.58, - "learning_rate": 0.00010178571428571428, - "loss": 1.1232, + "epoch": 8.7, + "learning_rate": 9.062500000000001e-05, + "loss": 0.5675, "step": 770 }, { - "epoch": 15.6, - "learning_rate": 0.00010165816326530612, - "loss": 1.2242, + "epoch": 8.71, + "learning_rate": 9.048295454545455e-05, + "loss": 0.566, "step": 771 }, { - "epoch": 15.62, - "learning_rate": 0.00010153061224489797, - "loss": 1.1852, + "epoch": 8.72, + "learning_rate": 9.03409090909091e-05, + "loss": 0.5513, "step": 772 }, { - "epoch": 15.64, - "learning_rate": 0.0001014030612244898, - "loss": 1.2626, + "epoch": 8.73, + "learning_rate": 9.019886363636364e-05, + "loss": 0.5682, "step": 773 }, { - "epoch": 15.66, - "learning_rate": 0.00010127551020408164, - "loss": 1.1873, + "epoch": 8.74, + "learning_rate": 9.005681818181819e-05, + "loss": 0.5508, "step": 774 }, { - "epoch": 15.68, - "learning_rate": 0.00010114795918367349, - "loss": 1.3005, + "epoch": 8.76, + "learning_rate": 8.991477272727273e-05, + "loss": 0.5668, "step": 775 }, { - "epoch": 15.7, - "learning_rate": 0.0001010204081632653, - "loss": 1.1904, + "epoch": 8.77, + "learning_rate": 8.977272727272728e-05, + "loss": 0.569, "step": 776 }, { - "epoch": 15.72, - "learning_rate": 0.00010089285714285715, - "loss": 1.2927, + "epoch": 8.78, + "learning_rate": 8.963068181818182e-05, + "loss": 0.5897, "step": 777 }, { - "epoch": 15.74, - "learning_rate": 0.00010076530612244899, - "loss": 1.179, + "epoch": 8.79, + "learning_rate": 8.948863636363637e-05, + "loss": 0.5738, "step": 778 }, { - "epoch": 15.76, - "learning_rate": 0.00010063775510204084, - "loss": 1.2027, + "epoch": 8.8, + "learning_rate": 8.934659090909091e-05, + "loss": 0.5511, "step": 779 }, { - "epoch": 15.78, - "learning_rate": 0.00010051020408163265, - "loss": 1.2428, + "epoch": 8.81, + "learning_rate": 8.920454545454546e-05, + "loss": 0.5659, "step": 780 }, { - "epoch": 15.8, - "learning_rate": 0.0001003826530612245, - "loss": 1.2324, + "epoch": 8.82, + "learning_rate": 8.90625e-05, + "loss": 0.5649, "step": 781 }, { - "epoch": 15.82, - "learning_rate": 0.00010025510204081634, - "loss": 1.1251, + "epoch": 8.83, + "learning_rate": 8.892045454545455e-05, + "loss": 0.5618, "step": 782 }, { - "epoch": 15.84, - "learning_rate": 0.00010012755102040816, - "loss": 1.2405, + "epoch": 8.85, + "learning_rate": 8.87784090909091e-05, + "loss": 0.5602, "step": 783 }, { - "epoch": 15.86, - "learning_rate": 0.0001, - "loss": 1.2005, + "epoch": 8.86, + "learning_rate": 8.863636363636364e-05, + "loss": 0.5723, "step": 784 }, { - "epoch": 15.88, - "learning_rate": 9.987244897959184e-05, - "loss": 1.2259, + "epoch": 8.87, + "learning_rate": 8.849431818181818e-05, + "loss": 0.5816, "step": 785 }, { - "epoch": 15.9, - "learning_rate": 9.974489795918368e-05, - "loss": 1.1576, + "epoch": 8.88, + "learning_rate": 8.835227272727273e-05, + "loss": 0.555, "step": 786 }, { - "epoch": 15.92, - "learning_rate": 9.961734693877551e-05, - "loss": 1.1834, + "epoch": 8.89, + "learning_rate": 8.821022727272727e-05, + "loss": 0.5563, "step": 787 }, { - "epoch": 15.94, - "learning_rate": 9.948979591836736e-05, - "loss": 1.2396, + "epoch": 8.9, + "learning_rate": 8.806818181818183e-05, + "loss": 0.554, "step": 788 }, { - "epoch": 15.96, - "learning_rate": 9.936224489795919e-05, - "loss": 1.1865, + "epoch": 8.91, + "learning_rate": 8.792613636363636e-05, + "loss": 0.5671, "step": 789 }, { - "epoch": 15.98, - "learning_rate": 9.923469387755102e-05, - "loss": 1.2356, + "epoch": 8.92, + "learning_rate": 8.778409090909091e-05, + "loss": 0.5485, "step": 790 }, { - "epoch": 16.01, - "learning_rate": 9.910714285714286e-05, - "loss": 1.2639, + "epoch": 8.94, + "learning_rate": 8.764204545454547e-05, + "loss": 0.5712, "step": 791 }, { - "epoch": 16.03, - "learning_rate": 9.897959183673469e-05, - "loss": 1.1216, + "epoch": 8.95, + "learning_rate": 8.75e-05, + "loss": 0.5507, "step": 792 }, { - "epoch": 16.05, - "learning_rate": 9.885204081632652e-05, - "loss": 1.1051, + "epoch": 8.96, + "learning_rate": 8.735795454545454e-05, + "loss": 0.5718, "step": 793 }, { - "epoch": 16.07, - "learning_rate": 9.872448979591837e-05, - "loss": 1.0864, + "epoch": 8.97, + "learning_rate": 8.72159090909091e-05, + "loss": 0.5585, "step": 794 }, { - "epoch": 16.09, - "learning_rate": 9.859693877551021e-05, - "loss": 1.182, + "epoch": 8.98, + "learning_rate": 8.707386363636363e-05, + "loss": 0.5563, "step": 795 }, { - "epoch": 16.11, - "learning_rate": 9.846938775510204e-05, - "loss": 1.1272, + "epoch": 8.99, + "learning_rate": 8.693181818181818e-05, + "loss": 0.581, "step": 796 }, { - "epoch": 16.13, - "learning_rate": 9.834183673469389e-05, - "loss": 1.1946, + "epoch": 9.0, + "learning_rate": 8.678977272727274e-05, + "loss": 0.5511, "step": 797 }, { - "epoch": 16.15, - "learning_rate": 9.821428571428572e-05, - "loss": 1.0875, + "epoch": 9.02, + "learning_rate": 8.664772727272727e-05, + "loss": 0.5103, "step": 798 }, { - "epoch": 16.17, - "learning_rate": 9.808673469387756e-05, - "loss": 1.1671, + "epoch": 9.03, + "learning_rate": 8.650568181818183e-05, + "loss": 0.5323, "step": 799 }, { - "epoch": 16.19, - "learning_rate": 9.79591836734694e-05, - "loss": 1.1502, + "epoch": 9.04, + "learning_rate": 8.636363636363637e-05, + "loss": 0.5092, "step": 800 }, { - "epoch": 16.21, - "learning_rate": 9.783163265306124e-05, - "loss": 1.19, + "epoch": 9.05, + "learning_rate": 8.62215909090909e-05, + "loss": 0.5247, "step": 801 }, { - "epoch": 16.23, - "learning_rate": 9.770408163265307e-05, - "loss": 1.1258, + "epoch": 9.06, + "learning_rate": 8.607954545454546e-05, + "loss": 0.5403, "step": 802 }, { - "epoch": 16.25, - "learning_rate": 9.75765306122449e-05, - "loss": 1.1765, + "epoch": 9.07, + "learning_rate": 8.593750000000001e-05, + "loss": 0.5252, "step": 803 }, { - "epoch": 16.27, - "learning_rate": 9.744897959183674e-05, - "loss": 1.1217, + "epoch": 9.08, + "learning_rate": 8.579545454545454e-05, + "loss": 0.5296, "step": 804 }, { - "epoch": 16.29, - "learning_rate": 9.732142857142858e-05, - "loss": 1.1293, + "epoch": 9.09, + "learning_rate": 8.56534090909091e-05, + "loss": 0.5223, "step": 805 }, { - "epoch": 16.31, - "learning_rate": 9.719387755102042e-05, - "loss": 1.17, + "epoch": 9.11, + "learning_rate": 8.551136363636364e-05, + "loss": 0.4972, "step": 806 }, { - "epoch": 16.33, - "learning_rate": 9.706632653061225e-05, - "loss": 1.17, + "epoch": 9.12, + "learning_rate": 8.536931818181818e-05, + "loss": 0.5005, "step": 807 }, { - "epoch": 16.35, - "learning_rate": 9.693877551020408e-05, - "loss": 1.2004, + "epoch": 9.13, + "learning_rate": 8.522727272727273e-05, + "loss": 0.5249, "step": 808 }, { - "epoch": 16.37, - "learning_rate": 9.681122448979593e-05, - "loss": 1.1648, + "epoch": 9.14, + "learning_rate": 8.508522727272728e-05, + "loss": 0.5135, "step": 809 }, { - "epoch": 16.39, - "learning_rate": 9.668367346938776e-05, - "loss": 1.0688, + "epoch": 9.15, + "learning_rate": 8.494318181818182e-05, + "loss": 0.5053, "step": 810 }, { - "epoch": 16.41, - "learning_rate": 9.655612244897959e-05, - "loss": 1.1607, + "epoch": 9.16, + "learning_rate": 8.480113636363637e-05, + "loss": 0.5158, "step": 811 }, { - "epoch": 16.43, - "learning_rate": 9.642857142857143e-05, - "loss": 1.1298, + "epoch": 9.17, + "learning_rate": 8.465909090909091e-05, + "loss": 0.5061, "step": 812 }, { - "epoch": 16.45, - "learning_rate": 9.630102040816326e-05, - "loss": 1.1064, + "epoch": 9.18, + "learning_rate": 8.451704545454546e-05, + "loss": 0.4988, "step": 813 }, { - "epoch": 16.47, - "learning_rate": 9.617346938775511e-05, - "loss": 1.1472, + "epoch": 9.2, + "learning_rate": 8.4375e-05, + "loss": 0.5273, "step": 814 }, { - "epoch": 16.49, - "learning_rate": 9.604591836734694e-05, - "loss": 1.1577, + "epoch": 9.21, + "learning_rate": 8.423295454545455e-05, + "loss": 0.5332, "step": 815 }, { - "epoch": 16.51, - "learning_rate": 9.591836734693878e-05, - "loss": 1.1436, + "epoch": 9.22, + "learning_rate": 8.40909090909091e-05, + "loss": 0.5181, "step": 816 }, { - "epoch": 16.53, - "learning_rate": 9.579081632653061e-05, - "loss": 1.1657, + "epoch": 9.23, + "learning_rate": 8.394886363636364e-05, + "loss": 0.5085, "step": 817 }, { - "epoch": 16.55, - "learning_rate": 9.566326530612246e-05, - "loss": 1.1147, + "epoch": 9.24, + "learning_rate": 8.380681818181818e-05, + "loss": 0.5137, "step": 818 }, { - "epoch": 16.57, - "learning_rate": 9.553571428571429e-05, - "loss": 1.1839, + "epoch": 9.25, + "learning_rate": 8.366477272727273e-05, + "loss": 0.5195, "step": 819 }, { - "epoch": 16.59, - "learning_rate": 9.540816326530613e-05, - "loss": 1.1298, + "epoch": 9.26, + "learning_rate": 8.352272727272727e-05, + "loss": 0.5077, "step": 820 }, { - "epoch": 16.61, - "learning_rate": 9.528061224489796e-05, - "loss": 1.2141, + "epoch": 9.28, + "learning_rate": 8.338068181818183e-05, + "loss": 0.5074, "step": 821 }, { - "epoch": 16.63, - "learning_rate": 9.515306122448981e-05, - "loss": 1.2045, + "epoch": 9.29, + "learning_rate": 8.323863636363637e-05, + "loss": 0.5142, "step": 822 }, { - "epoch": 16.65, - "learning_rate": 9.502551020408164e-05, - "loss": 1.1791, + "epoch": 9.3, + "learning_rate": 8.309659090909091e-05, + "loss": 0.5116, "step": 823 }, { - "epoch": 16.67, - "learning_rate": 9.489795918367348e-05, - "loss": 1.1137, + "epoch": 9.31, + "learning_rate": 8.295454545454547e-05, + "loss": 0.4974, "step": 824 }, { - "epoch": 16.69, - "learning_rate": 9.477040816326531e-05, - "loss": 1.1312, + "epoch": 9.32, + "learning_rate": 8.28125e-05, + "loss": 0.5117, "step": 825 }, { - "epoch": 16.71, - "learning_rate": 9.464285714285715e-05, - "loss": 1.1102, + "epoch": 9.33, + "learning_rate": 8.267045454545455e-05, + "loss": 0.5114, "step": 826 }, { - "epoch": 16.73, - "learning_rate": 9.451530612244899e-05, - "loss": 1.1865, + "epoch": 9.34, + "learning_rate": 8.25284090909091e-05, + "loss": 0.5039, "step": 827 }, { - "epoch": 16.75, - "learning_rate": 9.438775510204082e-05, - "loss": 1.1232, + "epoch": 9.35, + "learning_rate": 8.238636363636364e-05, + "loss": 0.498, "step": 828 }, { - "epoch": 16.77, - "learning_rate": 9.426020408163265e-05, - "loss": 1.2068, + "epoch": 9.37, + "learning_rate": 8.224431818181818e-05, + "loss": 0.5042, "step": 829 }, { - "epoch": 16.79, - "learning_rate": 9.41326530612245e-05, - "loss": 1.1864, + "epoch": 9.38, + "learning_rate": 8.210227272727274e-05, + "loss": 0.5049, "step": 830 }, { - "epoch": 16.81, - "learning_rate": 9.400510204081633e-05, - "loss": 1.2195, + "epoch": 9.39, + "learning_rate": 8.196022727272727e-05, + "loss": 0.5123, "step": 831 }, { - "epoch": 16.83, - "learning_rate": 9.387755102040817e-05, - "loss": 1.2063, + "epoch": 9.4, + "learning_rate": 8.181818181818183e-05, + "loss": 0.4907, "step": 832 }, { - "epoch": 16.85, - "learning_rate": 9.375e-05, - "loss": 1.1455, + "epoch": 9.41, + "learning_rate": 8.167613636363637e-05, + "loss": 0.5267, "step": 833 }, { - "epoch": 16.88, - "learning_rate": 9.362244897959183e-05, - "loss": 1.1819, + "epoch": 9.42, + "learning_rate": 8.15340909090909e-05, + "loss": 0.5314, "step": 834 }, { - "epoch": 16.9, - "learning_rate": 9.349489795918368e-05, - "loss": 1.1887, + "epoch": 9.43, + "learning_rate": 8.139204545454546e-05, + "loss": 0.4952, "step": 835 }, { - "epoch": 16.92, - "learning_rate": 9.336734693877551e-05, - "loss": 1.1557, + "epoch": 9.44, + "learning_rate": 8.125000000000001e-05, + "loss": 0.5014, "step": 836 }, { - "epoch": 16.94, - "learning_rate": 9.323979591836735e-05, - "loss": 1.2094, + "epoch": 9.46, + "learning_rate": 8.110795454545454e-05, + "loss": 0.4967, "step": 837 }, { - "epoch": 16.96, - "learning_rate": 9.311224489795918e-05, - "loss": 1.1512, + "epoch": 9.47, + "learning_rate": 8.09659090909091e-05, + "loss": 0.5116, "step": 838 }, { - "epoch": 16.98, - "learning_rate": 9.298469387755103e-05, - "loss": 1.1463, + "epoch": 9.48, + "learning_rate": 8.082386363636365e-05, + "loss": 0.5119, "step": 839 }, { - "epoch": 17.0, - "learning_rate": 9.285714285714286e-05, - "loss": 1.155, + "epoch": 9.49, + "learning_rate": 8.068181818181818e-05, + "loss": 0.4987, "step": 840 }, { - "epoch": 17.02, - "learning_rate": 9.27295918367347e-05, - "loss": 1.1292, + "epoch": 9.5, + "learning_rate": 8.053977272727274e-05, + "loss": 0.5063, "step": 841 }, { - "epoch": 17.04, - "learning_rate": 9.260204081632653e-05, - "loss": 1.0996, + "epoch": 9.51, + "learning_rate": 8.039772727272728e-05, + "loss": 0.5019, "step": 842 }, { - "epoch": 17.06, - "learning_rate": 9.247448979591838e-05, - "loss": 1.0662, + "epoch": 9.52, + "learning_rate": 8.025568181818183e-05, + "loss": 0.5272, "step": 843 }, { - "epoch": 17.08, - "learning_rate": 9.234693877551021e-05, - "loss": 1.0931, + "epoch": 9.54, + "learning_rate": 8.011363636363637e-05, + "loss": 0.4969, "step": 844 }, { - "epoch": 17.1, - "learning_rate": 9.221938775510205e-05, - "loss": 1.0727, + "epoch": 9.55, + "learning_rate": 7.997159090909092e-05, + "loss": 0.5222, "step": 845 }, { - "epoch": 17.12, - "learning_rate": 9.209183673469388e-05, - "loss": 1.1043, + "epoch": 9.56, + "learning_rate": 7.982954545454546e-05, + "loss": 0.4729, "step": 846 }, { - "epoch": 17.14, - "learning_rate": 9.196428571428572e-05, - "loss": 1.0594, + "epoch": 9.57, + "learning_rate": 7.96875e-05, + "loss": 0.4976, "step": 847 }, { - "epoch": 17.16, - "learning_rate": 9.183673469387756e-05, - "loss": 1.0952, + "epoch": 9.58, + "learning_rate": 7.954545454545455e-05, + "loss": 0.4974, "step": 848 }, { - "epoch": 17.18, - "learning_rate": 9.170918367346939e-05, - "loss": 1.0639, + "epoch": 9.59, + "learning_rate": 7.94034090909091e-05, + "loss": 0.4849, "step": 849 }, { - "epoch": 17.2, - "learning_rate": 9.158163265306124e-05, - "loss": 1.132, + "epoch": 9.6, + "learning_rate": 7.926136363636364e-05, + "loss": 0.4897, "step": 850 }, { - "epoch": 17.22, - "learning_rate": 9.145408163265307e-05, - "loss": 1.1083, + "epoch": 9.61, + "learning_rate": 7.911931818181819e-05, + "loss": 0.4962, "step": 851 }, { - "epoch": 17.24, - "learning_rate": 9.13265306122449e-05, - "loss": 1.1282, + "epoch": 9.63, + "learning_rate": 7.897727272727273e-05, + "loss": 0.4877, "step": 852 }, { - "epoch": 17.26, - "learning_rate": 9.119897959183674e-05, - "loss": 1.0474, + "epoch": 9.64, + "learning_rate": 7.883522727272728e-05, + "loss": 0.4921, "step": 853 }, { - "epoch": 17.28, - "learning_rate": 9.107142857142857e-05, - "loss": 1.1138, + "epoch": 9.65, + "learning_rate": 7.869318181818182e-05, + "loss": 0.4969, "step": 854 }, { - "epoch": 17.3, - "learning_rate": 9.094387755102042e-05, - "loss": 1.1025, + "epoch": 9.66, + "learning_rate": 7.855113636363637e-05, + "loss": 0.5045, "step": 855 }, { - "epoch": 17.32, - "learning_rate": 9.081632653061225e-05, - "loss": 1.0968, + "epoch": 9.67, + "learning_rate": 7.840909090909091e-05, + "loss": 0.5207, "step": 856 }, { - "epoch": 17.34, - "learning_rate": 9.068877551020408e-05, - "loss": 1.1683, + "epoch": 9.68, + "learning_rate": 7.826704545454546e-05, + "loss": 0.5098, "step": 857 }, { - "epoch": 17.36, - "learning_rate": 9.056122448979592e-05, - "loss": 1.0975, + "epoch": 9.69, + "learning_rate": 7.8125e-05, + "loss": 0.5005, "step": 858 }, { - "epoch": 17.38, - "learning_rate": 9.043367346938775e-05, - "loss": 1.1274, + "epoch": 9.7, + "learning_rate": 7.798295454545455e-05, + "loss": 0.5028, "step": 859 }, { - "epoch": 17.4, - "learning_rate": 9.030612244897958e-05, - "loss": 1.0916, + "epoch": 9.72, + "learning_rate": 7.784090909090909e-05, + "loss": 0.5067, "step": 860 }, { - "epoch": 17.42, - "learning_rate": 9.017857142857143e-05, - "loss": 1.0912, + "epoch": 9.73, + "learning_rate": 7.769886363636364e-05, + "loss": 0.484, "step": 861 }, { - "epoch": 17.44, - "learning_rate": 9.005102040816327e-05, - "loss": 1.0875, + "epoch": 9.74, + "learning_rate": 7.755681818181818e-05, + "loss": 0.5029, "step": 862 }, { - "epoch": 17.46, - "learning_rate": 8.99234693877551e-05, - "loss": 1.05, + "epoch": 9.75, + "learning_rate": 7.741477272727273e-05, + "loss": 0.5077, "step": 863 }, { - "epoch": 17.48, - "learning_rate": 8.979591836734695e-05, - "loss": 1.1418, + "epoch": 9.76, + "learning_rate": 7.727272727272727e-05, + "loss": 0.5091, "step": 864 }, { - "epoch": 17.5, - "learning_rate": 8.966836734693878e-05, - "loss": 1.0609, + "epoch": 9.77, + "learning_rate": 7.713068181818183e-05, + "loss": 0.4781, "step": 865 }, { - "epoch": 17.52, - "learning_rate": 8.954081632653062e-05, - "loss": 1.1611, + "epoch": 9.78, + "learning_rate": 7.698863636363636e-05, + "loss": 0.5124, "step": 866 }, { - "epoch": 17.54, - "learning_rate": 8.941326530612245e-05, - "loss": 1.1065, + "epoch": 9.79, + "learning_rate": 7.684659090909091e-05, + "loss": 0.4859, "step": 867 }, { - "epoch": 17.56, - "learning_rate": 8.92857142857143e-05, - "loss": 1.1611, + "epoch": 9.81, + "learning_rate": 7.670454545454547e-05, + "loss": 0.4872, "step": 868 }, { - "epoch": 17.58, - "learning_rate": 8.915816326530613e-05, - "loss": 1.1398, + "epoch": 9.82, + "learning_rate": 7.65625e-05, + "loss": 0.4675, "step": 869 }, { - "epoch": 17.6, - "learning_rate": 8.903061224489796e-05, - "loss": 1.1055, + "epoch": 9.83, + "learning_rate": 7.642045454545454e-05, + "loss": 0.5056, "step": 870 }, { - "epoch": 17.62, - "learning_rate": 8.89030612244898e-05, - "loss": 1.1314, + "epoch": 9.84, + "learning_rate": 7.62784090909091e-05, + "loss": 0.4868, "step": 871 }, { - "epoch": 17.64, - "learning_rate": 8.877551020408164e-05, - "loss": 1.1084, + "epoch": 9.85, + "learning_rate": 7.613636363636363e-05, + "loss": 0.4907, "step": 872 }, { - "epoch": 17.66, - "learning_rate": 8.864795918367348e-05, - "loss": 1.1254, + "epoch": 9.86, + "learning_rate": 7.599431818181818e-05, + "loss": 0.474, "step": 873 }, { - "epoch": 17.68, - "learning_rate": 8.852040816326531e-05, - "loss": 1.142, + "epoch": 9.87, + "learning_rate": 7.585227272727274e-05, + "loss": 0.4813, "step": 874 }, { - "epoch": 17.7, - "learning_rate": 8.839285714285714e-05, - "loss": 1.1371, + "epoch": 9.89, + "learning_rate": 7.571022727272727e-05, + "loss": 0.4838, "step": 875 }, { - "epoch": 17.72, - "learning_rate": 8.826530612244899e-05, - "loss": 1.1092, + "epoch": 9.9, + "learning_rate": 7.556818181818183e-05, + "loss": 0.4935, "step": 876 }, { - "epoch": 17.75, - "learning_rate": 8.813775510204082e-05, - "loss": 1.161, + "epoch": 9.91, + "learning_rate": 7.542613636363637e-05, + "loss": 0.4884, "step": 877 }, { - "epoch": 17.77, - "learning_rate": 8.801020408163265e-05, - "loss": 1.1044, + "epoch": 9.92, + "learning_rate": 7.52840909090909e-05, + "loss": 0.4797, "step": 878 }, { - "epoch": 17.79, - "learning_rate": 8.788265306122449e-05, - "loss": 1.117, + "epoch": 9.93, + "learning_rate": 7.514204545454546e-05, + "loss": 0.479, "step": 879 }, { - "epoch": 17.81, - "learning_rate": 8.775510204081632e-05, - "loss": 1.1262, + "epoch": 9.94, + "learning_rate": 7.500000000000001e-05, + "loss": 0.4727, "step": 880 }, { - "epoch": 17.83, - "learning_rate": 8.762755102040817e-05, - "loss": 1.0829, + "epoch": 9.95, + "learning_rate": 7.485795454545454e-05, + "loss": 0.4758, "step": 881 }, { - "epoch": 17.85, - "learning_rate": 8.75e-05, - "loss": 1.1393, + "epoch": 9.96, + "learning_rate": 7.47159090909091e-05, + "loss": 0.482, "step": 882 }, { - "epoch": 17.87, - "learning_rate": 8.737244897959183e-05, - "loss": 1.1781, + "epoch": 9.98, + "learning_rate": 7.457386363636364e-05, + "loss": 0.4951, "step": 883 }, { - "epoch": 17.89, - "learning_rate": 8.724489795918367e-05, - "loss": 1.1582, + "epoch": 9.99, + "learning_rate": 7.443181818181817e-05, + "loss": 0.4823, "step": 884 }, { - "epoch": 17.91, - "learning_rate": 8.711734693877552e-05, - "loss": 1.1469, + "epoch": 10.0, + "learning_rate": 7.428977272727273e-05, + "loss": 0.4638, "step": 885 }, { - "epoch": 17.93, - "learning_rate": 8.698979591836735e-05, - "loss": 1.1494, + "epoch": 10.01, + "learning_rate": 7.414772727272728e-05, + "loss": 0.4715, "step": 886 }, { - "epoch": 17.95, - "learning_rate": 8.68622448979592e-05, - "loss": 1.1251, + "epoch": 10.02, + "learning_rate": 7.400568181818182e-05, + "loss": 0.461, "step": 887 }, { - "epoch": 17.97, - "learning_rate": 8.673469387755102e-05, - "loss": 1.1624, + "epoch": 10.03, + "learning_rate": 7.386363636363637e-05, + "loss": 0.4429, "step": 888 }, { - "epoch": 17.99, - "learning_rate": 8.660714285714287e-05, - "loss": 1.0842, + "epoch": 10.04, + "learning_rate": 7.372159090909091e-05, + "loss": 0.4403, "step": 889 }, { - "epoch": 18.01, - "learning_rate": 8.64795918367347e-05, - "loss": 1.1944, + "epoch": 10.05, + "learning_rate": 7.357954545454546e-05, + "loss": 0.4519, "step": 890 }, { - "epoch": 18.03, - "learning_rate": 8.635204081632653e-05, - "loss": 1.0642, + "epoch": 10.07, + "learning_rate": 7.34375e-05, + "loss": 0.4611, "step": 891 }, { - "epoch": 18.05, - "learning_rate": 8.622448979591838e-05, - "loss": 1.0459, + "epoch": 10.08, + "learning_rate": 7.329545454545455e-05, + "loss": 0.4543, "step": 892 }, { - "epoch": 18.07, - "learning_rate": 8.60969387755102e-05, - "loss": 1.0941, + "epoch": 10.09, + "learning_rate": 7.315340909090909e-05, + "loss": 0.4528, "step": 893 }, { - "epoch": 18.09, - "learning_rate": 8.596938775510205e-05, - "loss": 1.0457, + "epoch": 10.1, + "learning_rate": 7.301136363636364e-05, + "loss": 0.4586, "step": 894 }, { - "epoch": 18.11, - "learning_rate": 8.584183673469388e-05, - "loss": 1.1033, + "epoch": 10.11, + "learning_rate": 7.286931818181818e-05, + "loss": 0.4418, "step": 895 }, { - "epoch": 18.13, - "learning_rate": 8.571428571428571e-05, - "loss": 1.0756, + "epoch": 10.12, + "learning_rate": 7.272727272727273e-05, + "loss": 0.4435, "step": 896 }, { - "epoch": 18.15, - "learning_rate": 8.558673469387756e-05, - "loss": 1.0615, + "epoch": 10.13, + "learning_rate": 7.258522727272727e-05, + "loss": 0.44, "step": 897 }, { - "epoch": 18.17, - "learning_rate": 8.545918367346939e-05, - "loss": 1.0828, + "epoch": 10.15, + "learning_rate": 7.244318181818183e-05, + "loss": 0.4589, "step": 898 }, { - "epoch": 18.19, - "learning_rate": 8.533163265306123e-05, - "loss": 1.1158, + "epoch": 10.16, + "learning_rate": 7.230113636363636e-05, + "loss": 0.4597, "step": 899 }, { - "epoch": 18.21, - "learning_rate": 8.520408163265306e-05, - "loss": 1.0133, + "epoch": 10.17, + "learning_rate": 7.215909090909091e-05, + "loss": 0.4479, "step": 900 }, { - "epoch": 18.23, - "learning_rate": 8.50765306122449e-05, - "loss": 1.0437, + "epoch": 10.18, + "learning_rate": 7.201704545454547e-05, + "loss": 0.4477, "step": 901 }, { - "epoch": 18.25, - "learning_rate": 8.494897959183674e-05, - "loss": 1.0372, + "epoch": 10.19, + "learning_rate": 7.1875e-05, + "loss": 0.446, "step": 902 }, { - "epoch": 18.27, - "learning_rate": 8.482142857142857e-05, - "loss": 1.1012, + "epoch": 10.2, + "learning_rate": 7.173295454545454e-05, + "loss": 0.4546, "step": 903 }, { - "epoch": 18.29, - "learning_rate": 8.469387755102041e-05, - "loss": 1.0777, + "epoch": 10.21, + "learning_rate": 7.15909090909091e-05, + "loss": 0.4347, "step": 904 }, { - "epoch": 18.31, - "learning_rate": 8.456632653061224e-05, - "loss": 1.0799, + "epoch": 10.22, + "learning_rate": 7.144886363636363e-05, + "loss": 0.452, "step": 905 }, { - "epoch": 18.33, - "learning_rate": 8.443877551020409e-05, - "loss": 0.9846, + "epoch": 10.24, + "learning_rate": 7.130681818181818e-05, + "loss": 0.4536, "step": 906 }, { - "epoch": 18.35, - "learning_rate": 8.431122448979592e-05, - "loss": 1.1, + "epoch": 10.25, + "learning_rate": 7.116477272727274e-05, + "loss": 0.4492, "step": 907 }, { - "epoch": 18.37, - "learning_rate": 8.418367346938776e-05, - "loss": 1.0787, + "epoch": 10.26, + "learning_rate": 7.102272727272727e-05, + "loss": 0.4401, "step": 908 }, { - "epoch": 18.39, - "learning_rate": 8.40561224489796e-05, - "loss": 1.0647, + "epoch": 10.27, + "learning_rate": 7.088068181818183e-05, + "loss": 0.4609, "step": 909 }, { - "epoch": 18.41, - "learning_rate": 8.392857142857144e-05, - "loss": 1.056, + "epoch": 10.28, + "learning_rate": 7.073863636363637e-05, + "loss": 0.4544, "step": 910 }, { - "epoch": 18.43, - "learning_rate": 8.380102040816327e-05, - "loss": 1.1131, + "epoch": 10.29, + "learning_rate": 7.05965909090909e-05, + "loss": 0.4477, "step": 911 }, { - "epoch": 18.45, - "learning_rate": 8.367346938775511e-05, - "loss": 1.0825, + "epoch": 10.3, + "learning_rate": 7.045454545454546e-05, + "loss": 0.4445, "step": 912 }, { - "epoch": 18.47, - "learning_rate": 8.354591836734695e-05, - "loss": 1.0681, + "epoch": 10.31, + "learning_rate": 7.031250000000001e-05, + "loss": 0.4544, "step": 913 }, { - "epoch": 18.49, - "learning_rate": 8.341836734693878e-05, - "loss": 1.0479, + "epoch": 10.33, + "learning_rate": 7.017045454545454e-05, + "loss": 0.4634, "step": 914 }, { - "epoch": 18.51, - "learning_rate": 8.329081632653062e-05, - "loss": 1.0921, + "epoch": 10.34, + "learning_rate": 7.00284090909091e-05, + "loss": 0.4499, "step": 915 }, { - "epoch": 18.53, - "learning_rate": 8.316326530612245e-05, - "loss": 1.0626, + "epoch": 10.35, + "learning_rate": 6.988636363636364e-05, + "loss": 0.4354, "step": 916 }, { - "epoch": 18.55, - "learning_rate": 8.30357142857143e-05, - "loss": 1.0518, + "epoch": 10.36, + "learning_rate": 6.974431818181818e-05, + "loss": 0.454, "step": 917 }, { - "epoch": 18.57, - "learning_rate": 8.290816326530613e-05, - "loss": 1.0557, + "epoch": 10.37, + "learning_rate": 6.960227272727273e-05, + "loss": 0.4473, "step": 918 }, { - "epoch": 18.6, - "learning_rate": 8.278061224489796e-05, - "loss": 1.0831, + "epoch": 10.38, + "learning_rate": 6.946022727272728e-05, + "loss": 0.4347, "step": 919 }, { - "epoch": 18.62, - "learning_rate": 8.26530612244898e-05, - "loss": 1.0307, + "epoch": 10.39, + "learning_rate": 6.931818181818182e-05, + "loss": 0.441, "step": 920 }, { - "epoch": 18.64, - "learning_rate": 8.252551020408163e-05, - "loss": 1.0455, + "epoch": 10.4, + "learning_rate": 6.917613636363637e-05, + "loss": 0.4545, "step": 921 }, { - "epoch": 18.66, - "learning_rate": 8.239795918367348e-05, - "loss": 1.0667, + "epoch": 10.42, + "learning_rate": 6.903409090909091e-05, + "loss": 0.458, "step": 922 }, { - "epoch": 18.68, - "learning_rate": 8.227040816326531e-05, - "loss": 1.0736, + "epoch": 10.43, + "learning_rate": 6.889204545454546e-05, + "loss": 0.4381, "step": 923 }, { - "epoch": 18.7, - "learning_rate": 8.214285714285714e-05, - "loss": 1.0108, + "epoch": 10.44, + "learning_rate": 6.875e-05, + "loss": 0.441, "step": 924 }, { - "epoch": 18.72, - "learning_rate": 8.201530612244898e-05, - "loss": 1.0458, + "epoch": 10.45, + "learning_rate": 6.860795454545455e-05, + "loss": 0.4446, "step": 925 }, { - "epoch": 18.74, - "learning_rate": 8.188775510204081e-05, - "loss": 1.0852, + "epoch": 10.46, + "learning_rate": 6.84659090909091e-05, + "loss": 0.4548, "step": 926 }, { - "epoch": 18.76, - "learning_rate": 8.176020408163265e-05, - "loss": 1.1207, + "epoch": 10.47, + "learning_rate": 6.832386363636364e-05, + "loss": 0.4404, "step": 927 }, { - "epoch": 18.78, - "learning_rate": 8.163265306122449e-05, - "loss": 1.0914, + "epoch": 10.48, + "learning_rate": 6.818181818181818e-05, + "loss": 0.4446, "step": 928 }, { - "epoch": 18.8, - "learning_rate": 8.150510204081633e-05, - "loss": 1.1108, + "epoch": 10.5, + "learning_rate": 6.803977272727273e-05, + "loss": 0.4434, "step": 929 }, { - "epoch": 18.82, - "learning_rate": 8.137755102040817e-05, - "loss": 1.1394, + "epoch": 10.51, + "learning_rate": 6.789772727272727e-05, + "loss": 0.4778, "step": 930 }, { - "epoch": 18.84, - "learning_rate": 8.125000000000001e-05, - "loss": 1.029, + "epoch": 10.52, + "learning_rate": 6.775568181818182e-05, + "loss": 0.4356, "step": 931 }, { - "epoch": 18.86, - "learning_rate": 8.112244897959184e-05, - "loss": 1.0661, + "epoch": 10.53, + "learning_rate": 6.761363636363636e-05, + "loss": 0.4464, "step": 932 }, { - "epoch": 18.88, - "learning_rate": 8.099489795918369e-05, - "loss": 1.0303, + "epoch": 10.54, + "learning_rate": 6.747159090909091e-05, + "loss": 0.4387, "step": 933 }, { - "epoch": 18.9, - "learning_rate": 8.086734693877552e-05, - "loss": 1.1144, + "epoch": 10.55, + "learning_rate": 6.732954545454547e-05, + "loss": 0.456, "step": 934 }, { - "epoch": 18.92, - "learning_rate": 8.073979591836736e-05, - "loss": 1.1096, + "epoch": 10.56, + "learning_rate": 6.71875e-05, + "loss": 0.453, "step": 935 }, { - "epoch": 18.94, - "learning_rate": 8.061224489795919e-05, - "loss": 1.123, + "epoch": 10.57, + "learning_rate": 6.704545454545455e-05, + "loss": 0.4611, "step": 936 }, { - "epoch": 18.96, - "learning_rate": 8.048469387755102e-05, - "loss": 1.1002, + "epoch": 10.59, + "learning_rate": 6.69034090909091e-05, + "loss": 0.4354, "step": 937 }, { - "epoch": 18.98, - "learning_rate": 8.035714285714287e-05, - "loss": 1.1016, + "epoch": 10.6, + "learning_rate": 6.676136363636364e-05, + "loss": 0.4519, "step": 938 }, { - "epoch": 19.0, - "learning_rate": 8.02295918367347e-05, - "loss": 1.0847, + "epoch": 10.61, + "learning_rate": 6.661931818181818e-05, + "loss": 0.4435, "step": 939 }, { - "epoch": 19.02, - "learning_rate": 8.010204081632653e-05, - "loss": 1.1029, + "epoch": 10.62, + "learning_rate": 6.647727272727274e-05, + "loss": 0.4422, "step": 940 }, { - "epoch": 19.04, - "learning_rate": 7.997448979591837e-05, - "loss": 1.041, + "epoch": 10.63, + "learning_rate": 6.633522727272727e-05, + "loss": 0.4344, "step": 941 }, { - "epoch": 19.06, - "learning_rate": 7.98469387755102e-05, - "loss": 1.01, + "epoch": 10.64, + "learning_rate": 6.619318181818183e-05, + "loss": 0.4419, "step": 942 }, { - "epoch": 19.08, - "learning_rate": 7.971938775510205e-05, - "loss": 1.0197, + "epoch": 10.65, + "learning_rate": 6.605113636363637e-05, + "loss": 0.4308, "step": 943 }, { - "epoch": 19.1, - "learning_rate": 7.959183673469388e-05, - "loss": 1.0543, + "epoch": 10.66, + "learning_rate": 6.59090909090909e-05, + "loss": 0.4043, "step": 944 }, { - "epoch": 19.12, - "learning_rate": 7.946428571428571e-05, - "loss": 1.0369, + "epoch": 10.68, + "learning_rate": 6.576704545454546e-05, + "loss": 0.4626, "step": 945 }, { - "epoch": 19.14, - "learning_rate": 7.933673469387755e-05, - "loss": 1.0154, + "epoch": 10.69, + "learning_rate": 6.562500000000001e-05, + "loss": 0.4365, "step": 946 }, { - "epoch": 19.16, - "learning_rate": 7.920918367346939e-05, - "loss": 0.9546, + "epoch": 10.7, + "learning_rate": 6.548295454545454e-05, + "loss": 0.4397, "step": 947 }, { - "epoch": 19.18, - "learning_rate": 7.908163265306123e-05, - "loss": 0.9982, + "epoch": 10.71, + "learning_rate": 6.53409090909091e-05, + "loss": 0.4463, "step": 948 }, { - "epoch": 19.2, - "learning_rate": 7.895408163265306e-05, - "loss": 1.0748, + "epoch": 10.72, + "learning_rate": 6.519886363636364e-05, + "loss": 0.4394, "step": 949 }, { - "epoch": 19.22, - "learning_rate": 7.882653061224489e-05, - "loss": 1.0562, + "epoch": 10.73, + "learning_rate": 6.505681818181818e-05, + "loss": 0.45, "step": 950 }, { - "epoch": 19.24, - "learning_rate": 7.869897959183674e-05, - "loss": 1.0352, + "epoch": 10.74, + "learning_rate": 6.491477272727273e-05, + "loss": 0.4363, "step": 951 }, { - "epoch": 19.26, - "learning_rate": 7.857142857142858e-05, - "loss": 0.9976, + "epoch": 10.76, + "learning_rate": 6.477272727272728e-05, + "loss": 0.4566, "step": 952 }, { - "epoch": 19.28, - "learning_rate": 7.844387755102041e-05, - "loss": 1.0221, + "epoch": 10.77, + "learning_rate": 6.463068181818183e-05, + "loss": 0.4235, "step": 953 }, { - "epoch": 19.3, - "learning_rate": 7.831632653061226e-05, - "loss": 1.0119, + "epoch": 10.78, + "learning_rate": 6.448863636363637e-05, + "loss": 0.4458, "step": 954 }, { - "epoch": 19.32, - "learning_rate": 7.818877551020409e-05, - "loss": 1.0657, + "epoch": 10.79, + "learning_rate": 6.434659090909092e-05, + "loss": 0.423, "step": 955 }, { - "epoch": 19.34, - "learning_rate": 7.806122448979593e-05, - "loss": 0.9591, + "epoch": 10.8, + "learning_rate": 6.420454545454546e-05, + "loss": 0.445, "step": 956 }, { - "epoch": 19.36, - "learning_rate": 7.793367346938776e-05, - "loss": 1.0101, + "epoch": 10.81, + "learning_rate": 6.40625e-05, + "loss": 0.424, "step": 957 }, { - "epoch": 19.38, - "learning_rate": 7.780612244897959e-05, - "loss": 1.0453, + "epoch": 10.82, + "learning_rate": 6.392045454545455e-05, + "loss": 0.4224, "step": 958 }, { - "epoch": 19.4, - "learning_rate": 7.767857142857144e-05, - "loss": 1.0461, + "epoch": 10.83, + "learning_rate": 6.37784090909091e-05, + "loss": 0.4223, "step": 959 }, { - "epoch": 19.42, - "learning_rate": 7.755102040816327e-05, - "loss": 1.0959, + "epoch": 10.85, + "learning_rate": 6.363636363636364e-05, + "loss": 0.4314, "step": 960 }, { - "epoch": 19.44, - "learning_rate": 7.742346938775511e-05, - "loss": 1.0608, + "epoch": 10.86, + "learning_rate": 6.349431818181819e-05, + "loss": 0.4488, "step": 961 }, { - "epoch": 19.47, - "learning_rate": 7.729591836734694e-05, - "loss": 1.1177, + "epoch": 10.87, + "learning_rate": 6.335227272727273e-05, + "loss": 0.423, "step": 962 }, { - "epoch": 19.49, - "learning_rate": 7.716836734693877e-05, - "loss": 1.0354, + "epoch": 10.88, + "learning_rate": 6.321022727272728e-05, + "loss": 0.4416, "step": 963 }, { - "epoch": 19.51, - "learning_rate": 7.704081632653062e-05, - "loss": 1.0507, + "epoch": 10.89, + "learning_rate": 6.306818181818182e-05, + "loss": 0.423, "step": 964 }, { - "epoch": 19.53, - "learning_rate": 7.691326530612245e-05, - "loss": 1.0313, + "epoch": 10.9, + "learning_rate": 6.292613636363637e-05, + "loss": 0.4502, "step": 965 }, { - "epoch": 19.55, - "learning_rate": 7.67857142857143e-05, - "loss": 1.0569, + "epoch": 10.91, + "learning_rate": 6.278409090909091e-05, + "loss": 0.4266, "step": 966 }, { - "epoch": 19.57, - "learning_rate": 7.665816326530612e-05, - "loss": 1.0862, + "epoch": 10.92, + "learning_rate": 6.264204545454546e-05, + "loss": 0.4344, "step": 967 }, { - "epoch": 19.59, - "learning_rate": 7.653061224489796e-05, - "loss": 1.0593, + "epoch": 10.94, + "learning_rate": 6.25e-05, + "loss": 0.434, "step": 968 }, { - "epoch": 19.61, - "learning_rate": 7.64030612244898e-05, - "loss": 1.0602, + "epoch": 10.95, + "learning_rate": 6.235795454545455e-05, + "loss": 0.4269, "step": 969 }, { - "epoch": 19.63, - "learning_rate": 7.627551020408163e-05, - "loss": 1.0048, + "epoch": 10.96, + "learning_rate": 6.221590909090909e-05, + "loss": 0.4158, "step": 970 }, { - "epoch": 19.65, - "learning_rate": 7.614795918367347e-05, - "loss": 1.0346, + "epoch": 10.97, + "learning_rate": 6.207386363636364e-05, + "loss": 0.4231, "step": 971 }, { - "epoch": 19.67, - "learning_rate": 7.60204081632653e-05, - "loss": 1.0172, + "epoch": 10.98, + "learning_rate": 6.193181818181818e-05, + "loss": 0.4235, "step": 972 }, { - "epoch": 19.69, - "learning_rate": 7.589285714285714e-05, - "loss": 1.02, + "epoch": 10.99, + "learning_rate": 6.178977272727273e-05, + "loss": 0.4504, "step": 973 }, { - "epoch": 19.71, - "learning_rate": 7.576530612244898e-05, - "loss": 1.0028, + "epoch": 11.0, + "learning_rate": 6.164772727272727e-05, + "loss": 0.4394, "step": 974 }, { - "epoch": 19.73, - "learning_rate": 7.563775510204083e-05, - "loss": 1.08, + "epoch": 11.02, + "learning_rate": 6.150568181818183e-05, + "loss": 0.4333, "step": 975 }, { - "epoch": 19.75, - "learning_rate": 7.551020408163266e-05, - "loss": 1.0402, + "epoch": 11.03, + "learning_rate": 6.136363636363636e-05, + "loss": 0.3936, "step": 976 }, { - "epoch": 19.77, - "learning_rate": 7.53826530612245e-05, - "loss": 1.0567, + "epoch": 11.04, + "learning_rate": 6.122159090909091e-05, + "loss": 0.3933, "step": 977 }, { - "epoch": 19.79, - "learning_rate": 7.525510204081633e-05, - "loss": 1.0169, + "epoch": 11.05, + "learning_rate": 6.107954545454547e-05, + "loss": 0.4161, "step": 978 }, { - "epoch": 19.81, - "learning_rate": 7.512755102040818e-05, - "loss": 0.9881, + "epoch": 11.06, + "learning_rate": 6.0937500000000004e-05, + "loss": 0.4097, "step": 979 }, { - "epoch": 19.83, - "learning_rate": 7.500000000000001e-05, - "loss": 1.0677, + "epoch": 11.07, + "learning_rate": 6.079545454545454e-05, + "loss": 0.412, "step": 980 }, { - "epoch": 19.85, - "learning_rate": 7.487244897959184e-05, - "loss": 1.1026, + "epoch": 11.08, + "learning_rate": 6.0653409090909094e-05, + "loss": 0.4104, "step": 981 }, { - "epoch": 19.87, - "learning_rate": 7.474489795918368e-05, - "loss": 1.0101, + "epoch": 11.09, + "learning_rate": 6.051136363636364e-05, + "loss": 0.4152, "step": 982 }, { - "epoch": 19.89, - "learning_rate": 7.461734693877551e-05, - "loss": 1.069, + "epoch": 11.11, + "learning_rate": 6.036931818181818e-05, + "loss": 0.4037, "step": 983 }, { - "epoch": 19.91, - "learning_rate": 7.448979591836736e-05, - "loss": 1.0493, + "epoch": 11.12, + "learning_rate": 6.022727272727273e-05, + "loss": 0.413, "step": 984 }, { - "epoch": 19.93, - "learning_rate": 7.436224489795919e-05, - "loss": 1.0858, + "epoch": 11.13, + "learning_rate": 6.0085227272727274e-05, + "loss": 0.4413, "step": 985 }, { - "epoch": 19.95, - "learning_rate": 7.423469387755102e-05, - "loss": 1.0734, + "epoch": 11.14, + "learning_rate": 5.9943181818181826e-05, + "loss": 0.3908, "step": 986 }, { - "epoch": 19.97, - "learning_rate": 7.410714285714286e-05, - "loss": 1.0203, + "epoch": 11.15, + "learning_rate": 5.9801136363636365e-05, + "loss": 0.3982, "step": 987 }, { - "epoch": 19.99, - "learning_rate": 7.39795918367347e-05, - "loss": 1.0285, + "epoch": 11.16, + "learning_rate": 5.965909090909091e-05, + "loss": 0.4109, "step": 988 }, { - "epoch": 20.01, - "learning_rate": 7.385204081632653e-05, - "loss": 0.9446, + "epoch": 11.17, + "learning_rate": 5.951704545454546e-05, + "loss": 0.3923, "step": 989 }, { - "epoch": 20.03, - "learning_rate": 7.372448979591837e-05, - "loss": 0.9915, + "epoch": 11.18, + "learning_rate": 5.9375e-05, + "loss": 0.4107, "step": 990 }, { - "epoch": 20.05, - "learning_rate": 7.35969387755102e-05, - "loss": 0.9882, + "epoch": 11.2, + "learning_rate": 5.9232954545454545e-05, + "loss": 0.4099, "step": 991 }, { - "epoch": 20.07, - "learning_rate": 7.346938775510205e-05, - "loss": 0.9338, + "epoch": 11.21, + "learning_rate": 5.90909090909091e-05, + "loss": 0.4163, "step": 992 }, { - "epoch": 20.09, - "learning_rate": 7.334183673469388e-05, - "loss": 0.942, + "epoch": 11.22, + "learning_rate": 5.8948863636363635e-05, + "loss": 0.4189, "step": 993 }, { - "epoch": 20.11, - "learning_rate": 7.321428571428571e-05, - "loss": 0.9725, + "epoch": 11.23, + "learning_rate": 5.880681818181818e-05, + "loss": 0.3889, "step": 994 }, { - "epoch": 20.13, - "learning_rate": 7.308673469387755e-05, - "loss": 1.027, + "epoch": 11.24, + "learning_rate": 5.866477272727273e-05, + "loss": 0.3988, "step": 995 }, { - "epoch": 20.15, - "learning_rate": 7.29591836734694e-05, - "loss": 1.0081, + "epoch": 11.25, + "learning_rate": 5.852272727272727e-05, + "loss": 0.4215, "step": 996 }, { - "epoch": 20.17, - "learning_rate": 7.283163265306123e-05, - "loss": 1.0117, + "epoch": 11.26, + "learning_rate": 5.838068181818183e-05, + "loss": 0.4207, "step": 997 }, { - "epoch": 20.19, - "learning_rate": 7.270408163265307e-05, - "loss": 0.969, + "epoch": 11.27, + "learning_rate": 5.823863636363637e-05, + "loss": 0.413, "step": 998 }, { - "epoch": 20.21, - "learning_rate": 7.25765306122449e-05, - "loss": 1.0024, + "epoch": 11.29, + "learning_rate": 5.8096590909090906e-05, + "loss": 0.4057, "step": 999 }, { - "epoch": 20.23, - "learning_rate": 7.244897959183675e-05, - "loss": 0.994, + "epoch": 11.3, + "learning_rate": 5.7954545454545464e-05, + "loss": 0.3939, "step": 1000 }, { - "epoch": 20.25, - "learning_rate": 7.232142857142858e-05, - "loss": 1.0248, + "epoch": 11.31, + "learning_rate": 5.78125e-05, + "loss": 0.4199, "step": 1001 }, { - "epoch": 20.27, - "learning_rate": 7.219387755102042e-05, - "loss": 1.0493, + "epoch": 11.32, + "learning_rate": 5.767045454545454e-05, + "loss": 0.4076, "step": 1002 }, { - "epoch": 20.29, - "learning_rate": 7.206632653061225e-05, - "loss": 1.0011, + "epoch": 11.33, + "learning_rate": 5.75284090909091e-05, + "loss": 0.4079, "step": 1003 }, { - "epoch": 20.31, - "learning_rate": 7.193877551020408e-05, - "loss": 0.9874, + "epoch": 11.34, + "learning_rate": 5.738636363636364e-05, + "loss": 0.4002, "step": 1004 }, { - "epoch": 20.34, - "learning_rate": 7.181122448979593e-05, - "loss": 1.0049, + "epoch": 11.35, + "learning_rate": 5.724431818181818e-05, + "loss": 0.3801, "step": 1005 }, { - "epoch": 20.36, - "learning_rate": 7.168367346938776e-05, - "loss": 1.0314, + "epoch": 11.37, + "learning_rate": 5.7102272727272735e-05, + "loss": 0.3939, "step": 1006 }, { - "epoch": 20.38, - "learning_rate": 7.155612244897959e-05, - "loss": 0.9742, + "epoch": 11.38, + "learning_rate": 5.696022727272727e-05, + "loss": 0.3904, "step": 1007 }, { - "epoch": 20.4, - "learning_rate": 7.142857142857143e-05, - "loss": 1.0621, + "epoch": 11.39, + "learning_rate": 5.6818181818181825e-05, + "loss": 0.406, "step": 1008 }, { - "epoch": 20.42, - "learning_rate": 7.130102040816326e-05, - "loss": 0.9672, + "epoch": 11.4, + "learning_rate": 5.667613636363637e-05, + "loss": 0.4185, "step": 1009 }, { - "epoch": 20.44, - "learning_rate": 7.117346938775511e-05, - "loss": 1.0018, + "epoch": 11.41, + "learning_rate": 5.653409090909091e-05, + "loss": 0.3976, "step": 1010 }, { - "epoch": 20.46, - "learning_rate": 7.104591836734694e-05, - "loss": 1.0045, + "epoch": 11.42, + "learning_rate": 5.639204545454546e-05, + "loss": 0.3907, "step": 1011 }, { - "epoch": 20.48, - "learning_rate": 7.091836734693877e-05, - "loss": 0.9675, + "epoch": 11.43, + "learning_rate": 5.6250000000000005e-05, + "loss": 0.4065, "step": 1012 }, { - "epoch": 20.5, - "learning_rate": 7.079081632653062e-05, - "loss": 0.976, + "epoch": 11.44, + "learning_rate": 5.6107954545454544e-05, + "loss": 0.4069, "step": 1013 }, { - "epoch": 20.52, - "learning_rate": 7.066326530612245e-05, - "loss": 1.0523, + "epoch": 11.46, + "learning_rate": 5.5965909090909095e-05, + "loss": 0.3964, "step": 1014 }, { - "epoch": 20.54, - "learning_rate": 7.053571428571429e-05, - "loss": 1.052, + "epoch": 11.47, + "learning_rate": 5.582386363636364e-05, + "loss": 0.3912, "step": 1015 }, { - "epoch": 20.56, - "learning_rate": 7.040816326530612e-05, - "loss": 0.9903, + "epoch": 11.48, + "learning_rate": 5.568181818181818e-05, + "loss": 0.3944, "step": 1016 }, { - "epoch": 20.58, - "learning_rate": 7.028061224489795e-05, - "loss": 1.0337, + "epoch": 11.49, + "learning_rate": 5.553977272727273e-05, + "loss": 0.4197, "step": 1017 }, { - "epoch": 20.6, - "learning_rate": 7.01530612244898e-05, - "loss": 1.1122, + "epoch": 11.5, + "learning_rate": 5.5397727272727276e-05, + "loss": 0.4064, "step": 1018 }, { - "epoch": 20.62, - "learning_rate": 7.002551020408164e-05, - "loss": 1.0133, + "epoch": 11.51, + "learning_rate": 5.525568181818183e-05, + "loss": 0.4054, "step": 1019 }, { - "epoch": 20.64, - "learning_rate": 6.989795918367347e-05, - "loss": 0.9588, + "epoch": 11.52, + "learning_rate": 5.5113636363636366e-05, + "loss": 0.4128, "step": 1020 }, { - "epoch": 20.66, - "learning_rate": 6.977040816326532e-05, - "loss": 0.9892, + "epoch": 11.53, + "learning_rate": 5.497159090909091e-05, + "loss": 0.3976, "step": 1021 }, { - "epoch": 20.68, - "learning_rate": 6.964285714285715e-05, - "loss": 1.025, + "epoch": 11.55, + "learning_rate": 5.482954545454546e-05, + "loss": 0.3863, "step": 1022 }, { - "epoch": 20.7, - "learning_rate": 6.951530612244899e-05, - "loss": 1.0196, + "epoch": 11.56, + "learning_rate": 5.46875e-05, + "loss": 0.3994, "step": 1023 }, { - "epoch": 20.72, - "learning_rate": 6.938775510204082e-05, - "loss": 1.0146, + "epoch": 11.57, + "learning_rate": 5.4545454545454546e-05, + "loss": 0.401, "step": 1024 }, { - "epoch": 20.74, - "learning_rate": 6.926020408163265e-05, - "loss": 1.0656, + "epoch": 11.58, + "learning_rate": 5.44034090909091e-05, + "loss": 0.3948, "step": 1025 }, { - "epoch": 20.76, - "learning_rate": 6.91326530612245e-05, - "loss": 0.9584, + "epoch": 11.59, + "learning_rate": 5.4261363636363636e-05, + "loss": 0.3967, "step": 1026 }, { - "epoch": 20.78, - "learning_rate": 6.900510204081633e-05, - "loss": 0.9877, + "epoch": 11.6, + "learning_rate": 5.411931818181818e-05, + "loss": 0.413, "step": 1027 }, { - "epoch": 20.8, - "learning_rate": 6.887755102040817e-05, - "loss": 1.0607, + "epoch": 11.61, + "learning_rate": 5.397727272727273e-05, + "loss": 0.4032, "step": 1028 }, { - "epoch": 20.82, - "learning_rate": 6.875e-05, - "loss": 0.9969, + "epoch": 11.63, + "learning_rate": 5.383522727272727e-05, + "loss": 0.3905, "step": 1029 }, { - "epoch": 20.84, - "learning_rate": 6.862244897959184e-05, - "loss": 0.9506, + "epoch": 11.64, + "learning_rate": 5.3693181818181823e-05, + "loss": 0.4041, "step": 1030 }, { - "epoch": 20.86, - "learning_rate": 6.849489795918368e-05, - "loss": 1.0576, + "epoch": 11.65, + "learning_rate": 5.355113636363637e-05, + "loss": 0.392, "step": 1031 }, { - "epoch": 20.88, - "learning_rate": 6.836734693877551e-05, - "loss": 1.0094, + "epoch": 11.66, + "learning_rate": 5.340909090909091e-05, + "loss": 0.3942, "step": 1032 }, { - "epoch": 20.9, - "learning_rate": 6.823979591836735e-05, - "loss": 0.9872, + "epoch": 11.67, + "learning_rate": 5.326704545454546e-05, + "loss": 0.3946, "step": 1033 }, { - "epoch": 20.92, - "learning_rate": 6.811224489795919e-05, - "loss": 1.0544, + "epoch": 11.68, + "learning_rate": 5.3125000000000004e-05, + "loss": 0.3989, "step": 1034 }, { - "epoch": 20.94, - "learning_rate": 6.798469387755102e-05, - "loss": 1.0194, + "epoch": 11.69, + "learning_rate": 5.298295454545454e-05, + "loss": 0.4101, "step": 1035 }, { - "epoch": 20.96, - "learning_rate": 6.785714285714286e-05, - "loss": 1.0009, + "epoch": 11.7, + "learning_rate": 5.2840909090909094e-05, + "loss": 0.4033, "step": 1036 }, { - "epoch": 20.98, - "learning_rate": 6.772959183673469e-05, - "loss": 0.9727, + "epoch": 11.72, + "learning_rate": 5.269886363636364e-05, + "loss": 0.3937, "step": 1037 }, { - "epoch": 21.0, - "learning_rate": 6.760204081632652e-05, - "loss": 0.9754, + "epoch": 11.73, + "learning_rate": 5.255681818181818e-05, + "loss": 0.3873, "step": 1038 }, { - "epoch": 21.02, - "learning_rate": 6.747448979591837e-05, - "loss": 0.9953, + "epoch": 11.74, + "learning_rate": 5.241477272727273e-05, + "loss": 0.3922, "step": 1039 }, { - "epoch": 21.04, - "learning_rate": 6.73469387755102e-05, - "loss": 0.9307, + "epoch": 11.75, + "learning_rate": 5.2272727272727274e-05, + "loss": 0.4016, "step": 1040 }, { - "epoch": 21.06, - "learning_rate": 6.721938775510204e-05, - "loss": 0.9151, + "epoch": 11.76, + "learning_rate": 5.2130681818181826e-05, + "loss": 0.3892, "step": 1041 }, { - "epoch": 21.08, - "learning_rate": 6.709183673469389e-05, - "loss": 0.9474, + "epoch": 11.77, + "learning_rate": 5.1988636363636364e-05, + "loss": 0.3974, "step": 1042 }, { - "epoch": 21.1, - "learning_rate": 6.696428571428572e-05, - "loss": 0.9697, + "epoch": 11.78, + "learning_rate": 5.184659090909091e-05, + "loss": 0.4024, "step": 1043 }, { - "epoch": 21.12, - "learning_rate": 6.683673469387756e-05, - "loss": 0.9423, + "epoch": 11.79, + "learning_rate": 5.170454545454546e-05, + "loss": 0.3889, "step": 1044 }, { - "epoch": 21.14, - "learning_rate": 6.670918367346939e-05, - "loss": 0.9797, + "epoch": 11.81, + "learning_rate": 5.15625e-05, + "loss": 0.4097, "step": 1045 }, { - "epoch": 21.16, - "learning_rate": 6.658163265306124e-05, - "loss": 0.919, + "epoch": 11.82, + "learning_rate": 5.1420454545454545e-05, + "loss": 0.4047, "step": 1046 }, { - "epoch": 21.18, - "learning_rate": 6.645408163265307e-05, - "loss": 0.9743, + "epoch": 11.83, + "learning_rate": 5.12784090909091e-05, + "loss": 0.4033, "step": 1047 }, { - "epoch": 21.21, - "learning_rate": 6.63265306122449e-05, - "loss": 0.9575, + "epoch": 11.84, + "learning_rate": 5.1136363636363635e-05, + "loss": 0.3774, "step": 1048 }, { - "epoch": 21.23, - "learning_rate": 6.619897959183674e-05, - "loss": 0.9861, + "epoch": 11.85, + "learning_rate": 5.099431818181818e-05, + "loss": 0.405, "step": 1049 }, { - "epoch": 21.25, - "learning_rate": 6.607142857142857e-05, - "loss": 0.9103, + "epoch": 11.86, + "learning_rate": 5.085227272727273e-05, + "loss": 0.3996, "step": 1050 }, { - "epoch": 21.27, - "learning_rate": 6.594387755102042e-05, - "loss": 0.993, + "epoch": 11.87, + "learning_rate": 5.071022727272727e-05, + "loss": 0.3885, "step": 1051 }, { - "epoch": 21.29, - "learning_rate": 6.581632653061225e-05, - "loss": 0.9668, + "epoch": 11.88, + "learning_rate": 5.056818181818183e-05, + "loss": 0.3914, "step": 1052 }, { - "epoch": 21.31, - "learning_rate": 6.568877551020408e-05, - "loss": 1.0008, + "epoch": 11.9, + "learning_rate": 5.042613636363637e-05, + "loss": 0.3908, "step": 1053 }, { - "epoch": 21.33, - "learning_rate": 6.556122448979592e-05, - "loss": 0.9825, + "epoch": 11.91, + "learning_rate": 5.0284090909090905e-05, + "loss": 0.3921, "step": 1054 }, { - "epoch": 21.35, - "learning_rate": 6.543367346938776e-05, - "loss": 1.0174, + "epoch": 11.92, + "learning_rate": 5.0142045454545464e-05, + "loss": 0.4077, "step": 1055 }, { - "epoch": 21.37, - "learning_rate": 6.530612244897959e-05, - "loss": 0.9685, + "epoch": 11.93, + "learning_rate": 5e-05, + "loss": 0.3973, "step": 1056 }, { - "epoch": 21.39, - "learning_rate": 6.517857142857143e-05, - "loss": 0.9265, + "epoch": 11.94, + "learning_rate": 4.985795454545455e-05, + "loss": 0.3986, "step": 1057 }, { - "epoch": 21.41, - "learning_rate": 6.505102040816326e-05, - "loss": 0.9495, + "epoch": 11.95, + "learning_rate": 4.971590909090909e-05, + "loss": 0.3938, "step": 1058 }, { - "epoch": 21.43, - "learning_rate": 6.49234693877551e-05, - "loss": 0.9541, + "epoch": 11.96, + "learning_rate": 4.957386363636364e-05, + "loss": 0.3897, "step": 1059 }, { - "epoch": 21.45, - "learning_rate": 6.479591836734694e-05, - "loss": 0.9299, + "epoch": 11.98, + "learning_rate": 4.943181818181818e-05, + "loss": 0.3965, "step": 1060 }, { - "epoch": 21.47, - "learning_rate": 6.466836734693877e-05, - "loss": 0.9625, + "epoch": 11.99, + "learning_rate": 4.9289772727272735e-05, + "loss": 0.3999, "step": 1061 }, { - "epoch": 21.49, - "learning_rate": 6.454081632653061e-05, - "loss": 1.0054, + "epoch": 12.0, + "learning_rate": 4.914772727272727e-05, + "loss": 0.3814, "step": 1062 }, { - "epoch": 21.51, - "learning_rate": 6.441326530612244e-05, - "loss": 0.9893, + "epoch": 12.01, + "learning_rate": 4.900568181818182e-05, + "loss": 0.3879, "step": 1063 }, { - "epoch": 21.53, - "learning_rate": 6.428571428571429e-05, - "loss": 0.9906, + "epoch": 12.02, + "learning_rate": 4.886363636363637e-05, + "loss": 0.3768, "step": 1064 }, { - "epoch": 21.55, - "learning_rate": 6.415816326530613e-05, - "loss": 0.9487, + "epoch": 12.03, + "learning_rate": 4.8721590909090915e-05, + "loss": 0.3813, "step": 1065 }, { - "epoch": 21.57, - "learning_rate": 6.403061224489796e-05, - "loss": 0.9728, + "epoch": 12.04, + "learning_rate": 4.857954545454545e-05, + "loss": 0.3759, "step": 1066 }, { - "epoch": 21.59, - "learning_rate": 6.390306122448981e-05, - "loss": 0.9883, + "epoch": 12.05, + "learning_rate": 4.8437500000000005e-05, + "loss": 0.3817, "step": 1067 }, { - "epoch": 21.61, - "learning_rate": 6.377551020408164e-05, - "loss": 1.053, + "epoch": 12.07, + "learning_rate": 4.829545454545455e-05, + "loss": 0.3773, "step": 1068 }, { - "epoch": 21.63, - "learning_rate": 6.364795918367348e-05, - "loss": 1.012, + "epoch": 12.08, + "learning_rate": 4.815340909090909e-05, + "loss": 0.3807, "step": 1069 }, { - "epoch": 21.65, - "learning_rate": 6.352040816326531e-05, - "loss": 0.962, + "epoch": 12.09, + "learning_rate": 4.801136363636364e-05, + "loss": 0.3757, "step": 1070 }, { - "epoch": 21.67, - "learning_rate": 6.339285714285714e-05, - "loss": 0.9955, + "epoch": 12.1, + "learning_rate": 4.7869318181818185e-05, + "loss": 0.3819, "step": 1071 }, { - "epoch": 21.69, - "learning_rate": 6.326530612244899e-05, - "loss": 0.9908, + "epoch": 12.11, + "learning_rate": 4.772727272727273e-05, + "loss": 0.3731, "step": 1072 }, { - "epoch": 21.71, - "learning_rate": 6.313775510204082e-05, - "loss": 1.0327, + "epoch": 12.12, + "learning_rate": 4.7585227272727276e-05, + "loss": 0.3706, "step": 1073 }, { - "epoch": 21.73, - "learning_rate": 6.301020408163265e-05, - "loss": 0.9255, + "epoch": 12.13, + "learning_rate": 4.744318181818182e-05, + "loss": 0.3762, "step": 1074 }, { - "epoch": 21.75, - "learning_rate": 6.28826530612245e-05, - "loss": 0.9268, + "epoch": 12.14, + "learning_rate": 4.7301136363636366e-05, + "loss": 0.3749, "step": 1075 }, { - "epoch": 21.77, - "learning_rate": 6.275510204081633e-05, - "loss": 0.9204, + "epoch": 12.16, + "learning_rate": 4.715909090909091e-05, + "loss": 0.3884, "step": 1076 }, { - "epoch": 21.79, - "learning_rate": 6.262755102040817e-05, - "loss": 0.9838, + "epoch": 12.17, + "learning_rate": 4.7017045454545456e-05, + "loss": 0.373, "step": 1077 }, { - "epoch": 21.81, - "learning_rate": 6.25e-05, - "loss": 0.954, + "epoch": 12.18, + "learning_rate": 4.6875e-05, + "loss": 0.374, "step": 1078 }, { - "epoch": 21.83, - "learning_rate": 6.237244897959183e-05, - "loss": 1.0102, + "epoch": 12.19, + "learning_rate": 4.6732954545454546e-05, + "loss": 0.3813, "step": 1079 }, { - "epoch": 21.85, - "learning_rate": 6.224489795918368e-05, - "loss": 0.916, + "epoch": 12.2, + "learning_rate": 4.659090909090909e-05, + "loss": 0.3745, "step": 1080 }, { - "epoch": 21.87, - "learning_rate": 6.211734693877551e-05, - "loss": 0.9939, + "epoch": 12.21, + "learning_rate": 4.6448863636363636e-05, + "loss": 0.3646, "step": 1081 }, { - "epoch": 21.89, - "learning_rate": 6.198979591836735e-05, - "loss": 0.9675, + "epoch": 12.22, + "learning_rate": 4.630681818181818e-05, + "loss": 0.3729, "step": 1082 }, { - "epoch": 21.91, - "learning_rate": 6.186224489795918e-05, - "loss": 0.9666, + "epoch": 12.24, + "learning_rate": 4.616477272727273e-05, + "loss": 0.3701, "step": 1083 }, { - "epoch": 21.93, - "learning_rate": 6.173469387755101e-05, - "loss": 0.9919, + "epoch": 12.25, + "learning_rate": 4.602272727272727e-05, + "loss": 0.3655, "step": 1084 }, { - "epoch": 21.95, - "learning_rate": 6.160714285714286e-05, - "loss": 1.0106, + "epoch": 12.26, + "learning_rate": 4.5880681818181817e-05, + "loss": 0.3758, "step": 1085 }, { - "epoch": 21.97, - "learning_rate": 6.14795918367347e-05, - "loss": 0.9982, + "epoch": 12.27, + "learning_rate": 4.573863636363637e-05, + "loss": 0.3682, "step": 1086 }, { - "epoch": 21.99, - "learning_rate": 6.135204081632653e-05, - "loss": 1.0137, + "epoch": 12.28, + "learning_rate": 4.5596590909090913e-05, + "loss": 0.3865, "step": 1087 }, { - "epoch": 22.01, - "learning_rate": 6.122448979591838e-05, - "loss": 0.9331, + "epoch": 12.29, + "learning_rate": 4.545454545454546e-05, + "loss": 0.363, "step": 1088 }, { - "epoch": 22.03, - "learning_rate": 6.109693877551021e-05, - "loss": 0.8834, + "epoch": 12.3, + "learning_rate": 4.5312500000000004e-05, + "loss": 0.3727, "step": 1089 }, { - "epoch": 22.06, - "learning_rate": 6.0969387755102046e-05, - "loss": 0.9757, + "epoch": 12.31, + "learning_rate": 4.517045454545455e-05, + "loss": 0.3827, "step": 1090 }, { - "epoch": 22.08, - "learning_rate": 6.084183673469388e-05, - "loss": 0.9038, + "epoch": 12.33, + "learning_rate": 4.5028409090909094e-05, + "loss": 0.3658, "step": 1091 }, { - "epoch": 22.1, - "learning_rate": 6.0714285714285715e-05, - "loss": 0.9097, + "epoch": 12.34, + "learning_rate": 4.488636363636364e-05, + "loss": 0.3844, "step": 1092 }, { - "epoch": 22.12, - "learning_rate": 6.058673469387756e-05, - "loss": 0.8972, + "epoch": 12.35, + "learning_rate": 4.4744318181818184e-05, + "loss": 0.3731, "step": 1093 }, { - "epoch": 22.14, - "learning_rate": 6.045918367346939e-05, - "loss": 0.8825, + "epoch": 12.36, + "learning_rate": 4.460227272727273e-05, + "loss": 0.3767, "step": 1094 }, { - "epoch": 22.16, - "learning_rate": 6.0331632653061234e-05, - "loss": 0.9814, + "epoch": 12.37, + "learning_rate": 4.4460227272727274e-05, + "loss": 0.3751, "step": 1095 }, { - "epoch": 22.18, - "learning_rate": 6.0204081632653065e-05, - "loss": 0.9874, + "epoch": 12.38, + "learning_rate": 4.431818181818182e-05, + "loss": 0.3718, "step": 1096 }, { - "epoch": 22.2, - "learning_rate": 6.0076530612244896e-05, - "loss": 0.912, + "epoch": 12.39, + "learning_rate": 4.4176136363636364e-05, + "loss": 0.3833, "step": 1097 }, { - "epoch": 22.22, - "learning_rate": 5.994897959183674e-05, - "loss": 0.9206, + "epoch": 12.4, + "learning_rate": 4.4034090909090916e-05, + "loss": 0.3673, "step": 1098 }, { - "epoch": 22.24, - "learning_rate": 5.982142857142857e-05, - "loss": 0.9497, + "epoch": 12.42, + "learning_rate": 4.3892045454545454e-05, + "loss": 0.3799, "step": 1099 }, { - "epoch": 22.26, - "learning_rate": 5.9693877551020416e-05, - "loss": 0.9269, + "epoch": 12.43, + "learning_rate": 4.375e-05, + "loss": 0.3661, "step": 1100 }, { - "epoch": 22.28, - "learning_rate": 5.956632653061225e-05, - "loss": 0.9452, + "epoch": 12.44, + "learning_rate": 4.360795454545455e-05, + "loss": 0.3554, "step": 1101 }, { - "epoch": 22.3, - "learning_rate": 5.9438775510204084e-05, - "loss": 0.9548, + "epoch": 12.45, + "learning_rate": 4.346590909090909e-05, + "loss": 0.3787, "step": 1102 }, { - "epoch": 22.32, - "learning_rate": 5.931122448979592e-05, - "loss": 0.9689, + "epoch": 12.46, + "learning_rate": 4.3323863636363635e-05, + "loss": 0.3695, "step": 1103 }, { - "epoch": 22.34, - "learning_rate": 5.918367346938776e-05, - "loss": 0.9455, + "epoch": 12.47, + "learning_rate": 4.318181818181819e-05, + "loss": 0.3777, "step": 1104 }, { - "epoch": 22.36, - "learning_rate": 5.905612244897959e-05, - "loss": 0.9409, + "epoch": 12.48, + "learning_rate": 4.303977272727273e-05, + "loss": 0.3693, "step": 1105 }, { - "epoch": 22.38, - "learning_rate": 5.8928571428571435e-05, - "loss": 0.9093, + "epoch": 12.49, + "learning_rate": 4.289772727272727e-05, + "loss": 0.3731, "step": 1106 }, { - "epoch": 22.4, - "learning_rate": 5.8801020408163266e-05, - "loss": 0.921, + "epoch": 12.51, + "learning_rate": 4.275568181818182e-05, + "loss": 0.3659, "step": 1107 }, { - "epoch": 22.42, - "learning_rate": 5.867346938775511e-05, - "loss": 0.9368, + "epoch": 12.52, + "learning_rate": 4.261363636363637e-05, + "loss": 0.3689, "step": 1108 }, { - "epoch": 22.44, - "learning_rate": 5.854591836734694e-05, - "loss": 0.907, + "epoch": 12.53, + "learning_rate": 4.247159090909091e-05, + "loss": 0.3625, "step": 1109 }, { - "epoch": 22.46, - "learning_rate": 5.841836734693877e-05, - "loss": 0.9126, + "epoch": 12.54, + "learning_rate": 4.232954545454546e-05, + "loss": 0.3874, "step": 1110 }, { - "epoch": 22.48, - "learning_rate": 5.8290816326530616e-05, - "loss": 0.9161, + "epoch": 12.55, + "learning_rate": 4.21875e-05, + "loss": 0.3651, "step": 1111 }, { - "epoch": 22.5, - "learning_rate": 5.816326530612245e-05, - "loss": 0.9542, + "epoch": 12.56, + "learning_rate": 4.204545454545455e-05, + "loss": 0.3639, "step": 1112 }, { - "epoch": 22.52, - "learning_rate": 5.803571428571429e-05, - "loss": 0.9775, + "epoch": 12.57, + "learning_rate": 4.190340909090909e-05, + "loss": 0.378, "step": 1113 }, { - "epoch": 22.54, - "learning_rate": 5.790816326530612e-05, - "loss": 1.0006, + "epoch": 12.59, + "learning_rate": 4.176136363636364e-05, + "loss": 0.3726, "step": 1114 }, { - "epoch": 22.56, - "learning_rate": 5.778061224489796e-05, - "loss": 0.8965, + "epoch": 12.6, + "learning_rate": 4.161931818181818e-05, + "loss": 0.3732, "step": 1115 }, { - "epoch": 22.58, - "learning_rate": 5.7653061224489805e-05, - "loss": 0.944, + "epoch": 12.61, + "learning_rate": 4.1477272727272734e-05, + "loss": 0.3673, "step": 1116 }, { - "epoch": 22.6, - "learning_rate": 5.7525510204081636e-05, - "loss": 0.9162, + "epoch": 12.62, + "learning_rate": 4.133522727272727e-05, + "loss": 0.3566, "step": 1117 }, { - "epoch": 22.62, - "learning_rate": 5.739795918367348e-05, - "loss": 0.9325, + "epoch": 12.63, + "learning_rate": 4.119318181818182e-05, + "loss": 0.3757, "step": 1118 }, { - "epoch": 22.64, - "learning_rate": 5.727040816326531e-05, - "loss": 0.8998, + "epoch": 12.64, + "learning_rate": 4.105113636363637e-05, + "loss": 0.3739, "step": 1119 }, { - "epoch": 22.66, - "learning_rate": 5.714285714285714e-05, - "loss": 0.9362, + "epoch": 12.65, + "learning_rate": 4.0909090909090915e-05, + "loss": 0.3768, "step": 1120 }, { - "epoch": 22.68, - "learning_rate": 5.7015306122448986e-05, - "loss": 0.9969, + "epoch": 12.66, + "learning_rate": 4.076704545454545e-05, + "loss": 0.3758, "step": 1121 }, { - "epoch": 22.7, - "learning_rate": 5.688775510204082e-05, - "loss": 0.9104, + "epoch": 12.68, + "learning_rate": 4.0625000000000005e-05, + "loss": 0.3655, "step": 1122 }, { - "epoch": 22.72, - "learning_rate": 5.676020408163265e-05, - "loss": 0.9746, + "epoch": 12.69, + "learning_rate": 4.048295454545455e-05, + "loss": 0.3673, "step": 1123 }, { - "epoch": 22.74, - "learning_rate": 5.663265306122449e-05, - "loss": 0.9821, + "epoch": 12.7, + "learning_rate": 4.034090909090909e-05, + "loss": 0.3683, "step": 1124 }, { - "epoch": 22.76, - "learning_rate": 5.650510204081633e-05, - "loss": 0.9526, + "epoch": 12.71, + "learning_rate": 4.019886363636364e-05, + "loss": 0.3569, "step": 1125 }, { - "epoch": 22.78, - "learning_rate": 5.637755102040817e-05, - "loss": 0.871, + "epoch": 12.72, + "learning_rate": 4.0056818181818185e-05, + "loss": 0.3741, "step": 1126 }, { - "epoch": 22.8, - "learning_rate": 5.6250000000000005e-05, - "loss": 0.9534, + "epoch": 12.73, + "learning_rate": 3.991477272727273e-05, + "loss": 0.3817, "step": 1127 }, { - "epoch": 22.82, - "learning_rate": 5.6122448979591836e-05, - "loss": 0.9616, + "epoch": 12.74, + "learning_rate": 3.9772727272727275e-05, + "loss": 0.3748, "step": 1128 }, { - "epoch": 22.84, - "learning_rate": 5.599489795918368e-05, - "loss": 0.9627, + "epoch": 12.75, + "learning_rate": 3.963068181818182e-05, + "loss": 0.3625, "step": 1129 }, { - "epoch": 22.86, - "learning_rate": 5.586734693877551e-05, - "loss": 0.9704, + "epoch": 12.77, + "learning_rate": 3.9488636363636366e-05, + "loss": 0.3656, "step": 1130 }, { - "epoch": 22.88, - "learning_rate": 5.5739795918367356e-05, - "loss": 0.9506, + "epoch": 12.78, + "learning_rate": 3.934659090909091e-05, + "loss": 0.3564, "step": 1131 }, { - "epoch": 22.9, - "learning_rate": 5.561224489795919e-05, - "loss": 0.9553, + "epoch": 12.79, + "learning_rate": 3.9204545454545456e-05, + "loss": 0.3737, "step": 1132 }, { - "epoch": 22.93, - "learning_rate": 5.548469387755102e-05, - "loss": 0.9294, + "epoch": 12.8, + "learning_rate": 3.90625e-05, + "loss": 0.3649, "step": 1133 }, { - "epoch": 22.95, - "learning_rate": 5.535714285714286e-05, - "loss": 0.8979, + "epoch": 12.81, + "learning_rate": 3.8920454545454546e-05, + "loss": 0.3728, "step": 1134 }, { - "epoch": 22.97, - "learning_rate": 5.522959183673469e-05, - "loss": 1.0004, + "epoch": 12.82, + "learning_rate": 3.877840909090909e-05, + "loss": 0.3865, "step": 1135 }, { - "epoch": 22.99, - "learning_rate": 5.510204081632653e-05, - "loss": 0.9821, + "epoch": 12.83, + "learning_rate": 3.8636363636363636e-05, + "loss": 0.3866, "step": 1136 }, { - "epoch": 23.01, - "learning_rate": 5.497448979591837e-05, - "loss": 0.9607, + "epoch": 12.85, + "learning_rate": 3.849431818181818e-05, + "loss": 0.3725, "step": 1137 }, { - "epoch": 23.03, - "learning_rate": 5.4846938775510206e-05, - "loss": 0.9757, + "epoch": 12.86, + "learning_rate": 3.835227272727273e-05, + "loss": 0.3662, "step": 1138 }, { - "epoch": 23.05, - "learning_rate": 5.471938775510205e-05, - "loss": 0.9096, + "epoch": 12.87, + "learning_rate": 3.821022727272727e-05, + "loss": 0.3742, "step": 1139 }, { - "epoch": 23.07, - "learning_rate": 5.459183673469388e-05, - "loss": 0.9144, + "epoch": 12.88, + "learning_rate": 3.8068181818181816e-05, + "loss": 0.3727, "step": 1140 }, { - "epoch": 23.09, - "learning_rate": 5.446428571428571e-05, - "loss": 0.8667, + "epoch": 12.89, + "learning_rate": 3.792613636363637e-05, + "loss": 0.3653, "step": 1141 }, { - "epoch": 23.11, - "learning_rate": 5.4336734693877556e-05, - "loss": 0.8993, + "epoch": 12.9, + "learning_rate": 3.778409090909091e-05, + "loss": 0.3631, "step": 1142 }, { - "epoch": 23.13, - "learning_rate": 5.420918367346939e-05, - "loss": 0.8964, + "epoch": 12.91, + "learning_rate": 3.764204545454545e-05, + "loss": 0.3674, "step": 1143 }, { - "epoch": 23.15, - "learning_rate": 5.408163265306123e-05, - "loss": 0.9173, + "epoch": 12.92, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.3598, "step": 1144 }, { - "epoch": 23.17, - "learning_rate": 5.395408163265306e-05, - "loss": 0.9019, + "epoch": 12.94, + "learning_rate": 3.735795454545455e-05, + "loss": 0.3697, "step": 1145 }, { - "epoch": 23.19, - "learning_rate": 5.382653061224489e-05, - "loss": 0.9303, + "epoch": 12.95, + "learning_rate": 3.721590909090909e-05, + "loss": 0.3639, "step": 1146 }, { - "epoch": 23.21, - "learning_rate": 5.369897959183674e-05, - "loss": 0.9268, + "epoch": 12.96, + "learning_rate": 3.707386363636364e-05, + "loss": 0.3597, "step": 1147 }, { - "epoch": 23.23, - "learning_rate": 5.3571428571428575e-05, - "loss": 0.8803, + "epoch": 12.97, + "learning_rate": 3.6931818181818184e-05, + "loss": 0.3815, "step": 1148 }, { - "epoch": 23.25, - "learning_rate": 5.344387755102041e-05, - "loss": 0.9197, + "epoch": 12.98, + "learning_rate": 3.678977272727273e-05, + "loss": 0.3477, "step": 1149 }, { - "epoch": 23.27, - "learning_rate": 5.331632653061225e-05, - "loss": 0.9204, + "epoch": 12.99, + "learning_rate": 3.6647727272727274e-05, + "loss": 0.3631, "step": 1150 }, { - "epoch": 23.29, - "learning_rate": 5.318877551020408e-05, - "loss": 0.8802, + "epoch": 13.0, + "learning_rate": 3.650568181818182e-05, + "loss": 0.3569, "step": 1151 }, { - "epoch": 23.31, - "learning_rate": 5.3061224489795926e-05, - "loss": 0.9044, + "epoch": 13.01, + "learning_rate": 3.6363636363636364e-05, + "loss": 0.3435, "step": 1152 }, { - "epoch": 23.33, - "learning_rate": 5.293367346938776e-05, - "loss": 0.8893, + "epoch": 13.03, + "learning_rate": 3.6221590909090916e-05, + "loss": 0.3504, "step": 1153 }, { - "epoch": 23.35, - "learning_rate": 5.280612244897959e-05, - "loss": 0.8928, + "epoch": 13.04, + "learning_rate": 3.6079545454545454e-05, + "loss": 0.3582, "step": 1154 }, { - "epoch": 23.37, - "learning_rate": 5.267857142857143e-05, - "loss": 0.9353, + "epoch": 13.05, + "learning_rate": 3.59375e-05, + "loss": 0.356, "step": 1155 }, { - "epoch": 23.39, - "learning_rate": 5.255102040816326e-05, - "loss": 0.9345, + "epoch": 13.06, + "learning_rate": 3.579545454545455e-05, + "loss": 0.3506, "step": 1156 }, { - "epoch": 23.41, - "learning_rate": 5.242346938775511e-05, - "loss": 0.9372, + "epoch": 13.07, + "learning_rate": 3.565340909090909e-05, + "loss": 0.3628, "step": 1157 }, { - "epoch": 23.43, - "learning_rate": 5.229591836734694e-05, - "loss": 0.9234, + "epoch": 13.08, + "learning_rate": 3.5511363636363635e-05, + "loss": 0.3494, "step": 1158 }, { - "epoch": 23.45, - "learning_rate": 5.2168367346938776e-05, - "loss": 0.9177, + "epoch": 13.09, + "learning_rate": 3.5369318181818186e-05, + "loss": 0.3653, "step": 1159 }, { - "epoch": 23.47, - "learning_rate": 5.2040816326530614e-05, - "loss": 0.8757, + "epoch": 13.11, + "learning_rate": 3.522727272727273e-05, + "loss": 0.3515, "step": 1160 }, { - "epoch": 23.49, - "learning_rate": 5.191326530612245e-05, - "loss": 0.9048, + "epoch": 13.12, + "learning_rate": 3.508522727272727e-05, + "loss": 0.3474, "step": 1161 }, { - "epoch": 23.51, - "learning_rate": 5.1785714285714296e-05, - "loss": 0.9248, + "epoch": 13.13, + "learning_rate": 3.494318181818182e-05, + "loss": 0.3469, "step": 1162 }, { - "epoch": 23.53, - "learning_rate": 5.1658163265306127e-05, - "loss": 0.9379, + "epoch": 13.14, + "learning_rate": 3.480113636363637e-05, + "loss": 0.3471, "step": 1163 }, { - "epoch": 23.55, - "learning_rate": 5.153061224489796e-05, - "loss": 0.8596, + "epoch": 13.15, + "learning_rate": 3.465909090909091e-05, + "loss": 0.355, "step": 1164 }, { - "epoch": 23.57, - "learning_rate": 5.14030612244898e-05, - "loss": 0.9751, + "epoch": 13.16, + "learning_rate": 3.451704545454546e-05, + "loss": 0.3532, "step": 1165 }, { - "epoch": 23.59, - "learning_rate": 5.127551020408163e-05, - "loss": 0.8842, + "epoch": 13.17, + "learning_rate": 3.4375e-05, + "loss": 0.3533, "step": 1166 }, { - "epoch": 23.61, - "learning_rate": 5.114795918367348e-05, - "loss": 0.8765, + "epoch": 13.18, + "learning_rate": 3.423295454545455e-05, + "loss": 0.3571, "step": 1167 }, { - "epoch": 23.63, - "learning_rate": 5.102040816326531e-05, - "loss": 0.8942, + "epoch": 13.2, + "learning_rate": 3.409090909090909e-05, + "loss": 0.3435, "step": 1168 }, { - "epoch": 23.65, - "learning_rate": 5.089285714285714e-05, - "loss": 0.938, + "epoch": 13.21, + "learning_rate": 3.394886363636364e-05, + "loss": 0.348, "step": 1169 }, { - "epoch": 23.67, - "learning_rate": 5.076530612244898e-05, - "loss": 0.8993, + "epoch": 13.22, + "learning_rate": 3.380681818181818e-05, + "loss": 0.3505, "step": 1170 }, { - "epoch": 23.69, - "learning_rate": 5.063775510204082e-05, - "loss": 0.9362, + "epoch": 13.23, + "learning_rate": 3.3664772727272734e-05, + "loss": 0.346, "step": 1171 }, { - "epoch": 23.71, - "learning_rate": 5.051020408163265e-05, - "loss": 0.9249, + "epoch": 13.24, + "learning_rate": 3.352272727272727e-05, + "loss": 0.3568, "step": 1172 }, { - "epoch": 23.73, - "learning_rate": 5.0382653061224496e-05, - "loss": 0.9055, + "epoch": 13.25, + "learning_rate": 3.338068181818182e-05, + "loss": 0.3548, "step": 1173 }, { - "epoch": 23.75, - "learning_rate": 5.025510204081633e-05, - "loss": 0.8967, + "epoch": 13.26, + "learning_rate": 3.323863636363637e-05, + "loss": 0.352, "step": 1174 }, { - "epoch": 23.77, - "learning_rate": 5.012755102040817e-05, - "loss": 0.8795, + "epoch": 13.27, + "learning_rate": 3.3096590909090915e-05, + "loss": 0.3561, "step": 1175 }, { - "epoch": 23.8, - "learning_rate": 5e-05, - "loss": 0.9452, + "epoch": 13.29, + "learning_rate": 3.295454545454545e-05, + "loss": 0.3424, "step": 1176 }, { - "epoch": 23.82, - "learning_rate": 4.987244897959184e-05, - "loss": 0.926, + "epoch": 13.3, + "learning_rate": 3.2812500000000005e-05, + "loss": 0.3453, "step": 1177 }, { - "epoch": 23.84, - "learning_rate": 4.974489795918368e-05, - "loss": 0.8948, + "epoch": 13.31, + "learning_rate": 3.267045454545455e-05, + "loss": 0.347, "step": 1178 }, { - "epoch": 23.86, - "learning_rate": 4.961734693877551e-05, - "loss": 0.8926, + "epoch": 13.32, + "learning_rate": 3.252840909090909e-05, + "loss": 0.3526, "step": 1179 }, { - "epoch": 23.88, - "learning_rate": 4.9489795918367346e-05, - "loss": 0.8949, + "epoch": 13.33, + "learning_rate": 3.238636363636364e-05, + "loss": 0.3634, "step": 1180 }, { - "epoch": 23.9, - "learning_rate": 4.9362244897959184e-05, - "loss": 0.9648, + "epoch": 13.34, + "learning_rate": 3.2244318181818185e-05, + "loss": 0.3591, "step": 1181 }, { - "epoch": 23.92, - "learning_rate": 4.923469387755102e-05, - "loss": 0.9599, + "epoch": 13.35, + "learning_rate": 3.210227272727273e-05, + "loss": 0.3449, "step": 1182 }, { - "epoch": 23.94, - "learning_rate": 4.910714285714286e-05, - "loss": 0.9603, + "epoch": 13.36, + "learning_rate": 3.1960227272727275e-05, + "loss": 0.3362, "step": 1183 }, { - "epoch": 23.96, - "learning_rate": 4.89795918367347e-05, - "loss": 0.9302, + "epoch": 13.38, + "learning_rate": 3.181818181818182e-05, + "loss": 0.3613, "step": 1184 }, { - "epoch": 23.98, - "learning_rate": 4.8852040816326534e-05, - "loss": 0.9261, + "epoch": 13.39, + "learning_rate": 3.1676136363636365e-05, + "loss": 0.3509, "step": 1185 }, { - "epoch": 24.0, - "learning_rate": 4.872448979591837e-05, - "loss": 0.9257, + "epoch": 13.4, + "learning_rate": 3.153409090909091e-05, + "loss": 0.3534, "step": 1186 }, { - "epoch": 24.02, - "learning_rate": 4.859693877551021e-05, - "loss": 0.8725, + "epoch": 13.41, + "learning_rate": 3.1392045454545456e-05, + "loss": 0.3452, "step": 1187 }, { - "epoch": 24.04, - "learning_rate": 4.846938775510204e-05, - "loss": 0.8486, + "epoch": 13.42, + "learning_rate": 3.125e-05, + "loss": 0.3659, "step": 1188 }, { - "epoch": 24.06, - "learning_rate": 4.834183673469388e-05, - "loss": 0.8457, + "epoch": 13.43, + "learning_rate": 3.1107954545454546e-05, + "loss": 0.3445, "step": 1189 }, { - "epoch": 24.08, - "learning_rate": 4.8214285714285716e-05, - "loss": 0.7848, + "epoch": 13.44, + "learning_rate": 3.096590909090909e-05, + "loss": 0.3536, "step": 1190 }, { - "epoch": 24.1, - "learning_rate": 4.8086734693877554e-05, - "loss": 0.8885, + "epoch": 13.46, + "learning_rate": 3.0823863636363636e-05, + "loss": 0.3588, "step": 1191 }, { - "epoch": 24.12, - "learning_rate": 4.795918367346939e-05, - "loss": 0.9099, + "epoch": 13.47, + "learning_rate": 3.068181818181818e-05, + "loss": 0.347, "step": 1192 }, { - "epoch": 24.14, - "learning_rate": 4.783163265306123e-05, - "loss": 0.9147, + "epoch": 13.48, + "learning_rate": 3.053977272727273e-05, + "loss": 0.3537, "step": 1193 }, { - "epoch": 24.16, - "learning_rate": 4.7704081632653066e-05, - "loss": 0.8781, + "epoch": 13.49, + "learning_rate": 3.039772727272727e-05, + "loss": 0.3516, "step": 1194 }, { - "epoch": 24.18, - "learning_rate": 4.7576530612244904e-05, - "loss": 0.8847, + "epoch": 13.5, + "learning_rate": 3.025568181818182e-05, + "loss": 0.3587, "step": 1195 }, { - "epoch": 24.2, - "learning_rate": 4.744897959183674e-05, - "loss": 0.9041, + "epoch": 13.51, + "learning_rate": 3.0113636363636365e-05, + "loss": 0.3494, "step": 1196 }, { - "epoch": 24.22, - "learning_rate": 4.732142857142857e-05, - "loss": 0.8639, + "epoch": 13.52, + "learning_rate": 2.9971590909090913e-05, + "loss": 0.3341, "step": 1197 }, { - "epoch": 24.24, - "learning_rate": 4.719387755102041e-05, - "loss": 0.8831, + "epoch": 13.53, + "learning_rate": 2.9829545454545455e-05, + "loss": 0.3563, "step": 1198 }, { - "epoch": 24.26, - "learning_rate": 4.706632653061225e-05, - "loss": 0.9063, + "epoch": 13.55, + "learning_rate": 2.96875e-05, + "loss": 0.3534, "step": 1199 }, { - "epoch": 24.28, - "learning_rate": 4.6938775510204086e-05, - "loss": 0.8753, + "epoch": 13.56, + "learning_rate": 2.954545454545455e-05, + "loss": 0.3632, "step": 1200 }, { - "epoch": 24.3, - "learning_rate": 4.6811224489795916e-05, - "loss": 0.8977, + "epoch": 13.57, + "learning_rate": 2.940340909090909e-05, + "loss": 0.3523, "step": 1201 }, { - "epoch": 24.32, - "learning_rate": 4.6683673469387754e-05, - "loss": 0.8729, + "epoch": 13.58, + "learning_rate": 2.9261363636363635e-05, + "loss": 0.3587, "step": 1202 }, { - "epoch": 24.34, - "learning_rate": 4.655612244897959e-05, - "loss": 0.898, + "epoch": 13.59, + "learning_rate": 2.9119318181818184e-05, + "loss": 0.3523, "step": 1203 }, { - "epoch": 24.36, - "learning_rate": 4.642857142857143e-05, - "loss": 0.8521, + "epoch": 13.6, + "learning_rate": 2.8977272727272732e-05, + "loss": 0.3418, "step": 1204 }, { - "epoch": 24.38, - "learning_rate": 4.630102040816327e-05, - "loss": 0.8563, + "epoch": 13.61, + "learning_rate": 2.883522727272727e-05, + "loss": 0.3515, "step": 1205 }, { - "epoch": 24.4, - "learning_rate": 4.6173469387755105e-05, - "loss": 0.8462, + "epoch": 13.62, + "learning_rate": 2.869318181818182e-05, + "loss": 0.3362, "step": 1206 }, { - "epoch": 24.42, - "learning_rate": 4.604591836734694e-05, - "loss": 0.8929, + "epoch": 13.64, + "learning_rate": 2.8551136363636367e-05, + "loss": 0.3393, "step": 1207 }, { - "epoch": 24.44, - "learning_rate": 4.591836734693878e-05, - "loss": 0.9154, + "epoch": 13.65, + "learning_rate": 2.8409090909090912e-05, + "loss": 0.3395, "step": 1208 }, { - "epoch": 24.46, - "learning_rate": 4.579081632653062e-05, - "loss": 0.811, + "epoch": 13.66, + "learning_rate": 2.8267045454545454e-05, + "loss": 0.3434, "step": 1209 }, { - "epoch": 24.48, - "learning_rate": 4.566326530612245e-05, - "loss": 0.8667, + "epoch": 13.67, + "learning_rate": 2.8125000000000003e-05, + "loss": 0.3536, "step": 1210 }, { - "epoch": 24.5, - "learning_rate": 4.5535714285714286e-05, - "loss": 0.9179, + "epoch": 13.68, + "learning_rate": 2.7982954545454548e-05, + "loss": 0.3482, "step": 1211 }, { - "epoch": 24.52, - "learning_rate": 4.5408163265306124e-05, - "loss": 0.8757, + "epoch": 13.69, + "learning_rate": 2.784090909090909e-05, + "loss": 0.3383, "step": 1212 }, { - "epoch": 24.54, - "learning_rate": 4.528061224489796e-05, - "loss": 0.8519, + "epoch": 13.7, + "learning_rate": 2.7698863636363638e-05, + "loss": 0.3582, "step": 1213 }, { - "epoch": 24.56, - "learning_rate": 4.515306122448979e-05, - "loss": 0.9335, + "epoch": 13.72, + "learning_rate": 2.7556818181818183e-05, + "loss": 0.3469, "step": 1214 }, { - "epoch": 24.58, - "learning_rate": 4.502551020408164e-05, - "loss": 0.8785, + "epoch": 13.73, + "learning_rate": 2.741477272727273e-05, + "loss": 0.351, "step": 1215 }, { - "epoch": 24.6, - "learning_rate": 4.4897959183673474e-05, - "loss": 0.9022, + "epoch": 13.74, + "learning_rate": 2.7272727272727273e-05, + "loss": 0.3461, "step": 1216 }, { - "epoch": 24.62, - "learning_rate": 4.477040816326531e-05, - "loss": 0.9532, + "epoch": 13.75, + "learning_rate": 2.7130681818181818e-05, + "loss": 0.3497, "step": 1217 }, { - "epoch": 24.64, - "learning_rate": 4.464285714285715e-05, - "loss": 0.8956, + "epoch": 13.76, + "learning_rate": 2.6988636363636367e-05, + "loss": 0.3503, "step": 1218 }, { - "epoch": 24.67, - "learning_rate": 4.451530612244898e-05, - "loss": 0.8739, + "epoch": 13.77, + "learning_rate": 2.6846590909090912e-05, + "loss": 0.3499, "step": 1219 }, { - "epoch": 24.69, - "learning_rate": 4.438775510204082e-05, - "loss": 0.9312, + "epoch": 13.78, + "learning_rate": 2.6704545454545453e-05, + "loss": 0.3529, "step": 1220 }, { - "epoch": 24.71, - "learning_rate": 4.4260204081632656e-05, - "loss": 0.8536, + "epoch": 13.79, + "learning_rate": 2.6562500000000002e-05, + "loss": 0.3516, "step": 1221 }, { - "epoch": 24.73, - "learning_rate": 4.4132653061224493e-05, - "loss": 0.8984, + "epoch": 13.81, + "learning_rate": 2.6420454545454547e-05, + "loss": 0.3457, "step": 1222 }, { - "epoch": 24.75, - "learning_rate": 4.4005102040816324e-05, - "loss": 0.8949, + "epoch": 13.82, + "learning_rate": 2.627840909090909e-05, + "loss": 0.3482, "step": 1223 }, { - "epoch": 24.77, - "learning_rate": 4.387755102040816e-05, - "loss": 0.9389, + "epoch": 13.83, + "learning_rate": 2.6136363636363637e-05, + "loss": 0.3458, "step": 1224 }, { - "epoch": 24.79, - "learning_rate": 4.375e-05, - "loss": 0.8703, + "epoch": 13.84, + "learning_rate": 2.5994318181818182e-05, + "loss": 0.3442, "step": 1225 }, { - "epoch": 24.81, - "learning_rate": 4.362244897959184e-05, - "loss": 0.9407, + "epoch": 13.85, + "learning_rate": 2.585227272727273e-05, + "loss": 0.3717, "step": 1226 }, { - "epoch": 24.83, - "learning_rate": 4.3494897959183675e-05, - "loss": 0.9016, + "epoch": 13.86, + "learning_rate": 2.5710227272727272e-05, + "loss": 0.3404, "step": 1227 }, { - "epoch": 24.85, - "learning_rate": 4.336734693877551e-05, - "loss": 0.9025, + "epoch": 13.87, + "learning_rate": 2.5568181818181817e-05, + "loss": 0.34, "step": 1228 }, { - "epoch": 24.87, - "learning_rate": 4.323979591836735e-05, - "loss": 0.9415, + "epoch": 13.88, + "learning_rate": 2.5426136363636366e-05, + "loss": 0.3615, "step": 1229 }, { - "epoch": 24.89, - "learning_rate": 4.311224489795919e-05, - "loss": 0.9146, + "epoch": 13.9, + "learning_rate": 2.5284090909090914e-05, + "loss": 0.3473, "step": 1230 }, { - "epoch": 24.91, - "learning_rate": 4.2984693877551025e-05, - "loss": 0.9144, + "epoch": 13.91, + "learning_rate": 2.5142045454545453e-05, + "loss": 0.3532, "step": 1231 }, { - "epoch": 24.93, - "learning_rate": 4.2857142857142856e-05, - "loss": 0.9138, + "epoch": 13.92, + "learning_rate": 2.5e-05, + "loss": 0.3524, "step": 1232 }, { - "epoch": 24.95, - "learning_rate": 4.2729591836734694e-05, - "loss": 0.9372, + "epoch": 13.93, + "learning_rate": 2.4857954545454546e-05, + "loss": 0.3681, "step": 1233 }, { - "epoch": 24.97, - "learning_rate": 4.260204081632653e-05, - "loss": 0.8701, + "epoch": 13.94, + "learning_rate": 2.471590909090909e-05, + "loss": 0.3432, "step": 1234 }, { - "epoch": 24.99, - "learning_rate": 4.247448979591837e-05, - "loss": 0.9278, + "epoch": 13.95, + "learning_rate": 2.4573863636363636e-05, + "loss": 0.3507, "step": 1235 }, { - "epoch": 25.01, - "learning_rate": 4.234693877551021e-05, - "loss": 0.9157, + "epoch": 13.96, + "learning_rate": 2.4431818181818185e-05, + "loss": 0.3448, "step": 1236 }, { - "epoch": 25.03, - "learning_rate": 4.2219387755102045e-05, - "loss": 0.8852, + "epoch": 13.97, + "learning_rate": 2.4289772727272727e-05, + "loss": 0.3501, "step": 1237 }, { - "epoch": 25.05, - "learning_rate": 4.209183673469388e-05, - "loss": 0.855, + "epoch": 13.99, + "learning_rate": 2.4147727272727275e-05, + "loss": 0.3624, "step": 1238 }, { - "epoch": 25.07, - "learning_rate": 4.196428571428572e-05, - "loss": 0.8547, + "epoch": 14.0, + "learning_rate": 2.400568181818182e-05, + "loss": 0.3446, "step": 1239 }, { - "epoch": 25.09, - "learning_rate": 4.183673469387756e-05, - "loss": 0.8691, + "epoch": 14.01, + "learning_rate": 2.3863636363636365e-05, + "loss": 0.3463, "step": 1240 }, { - "epoch": 25.11, - "learning_rate": 4.170918367346939e-05, - "loss": 0.9101, + "epoch": 14.02, + "learning_rate": 2.372159090909091e-05, + "loss": 0.3492, "step": 1241 }, { - "epoch": 25.13, - "learning_rate": 4.1581632653061226e-05, - "loss": 0.8408, + "epoch": 14.03, + "learning_rate": 2.3579545454545455e-05, + "loss": 0.3352, "step": 1242 }, { - "epoch": 25.15, - "learning_rate": 4.1454081632653064e-05, - "loss": 0.9008, + "epoch": 14.04, + "learning_rate": 2.34375e-05, + "loss": 0.3368, "step": 1243 }, { - "epoch": 25.17, - "learning_rate": 4.13265306122449e-05, - "loss": 0.859, + "epoch": 14.05, + "learning_rate": 2.3295454545454546e-05, + "loss": 0.3295, "step": 1244 }, { - "epoch": 25.19, - "learning_rate": 4.119897959183674e-05, - "loss": 0.8525, + "epoch": 14.07, + "learning_rate": 2.315340909090909e-05, + "loss": 0.3426, "step": 1245 }, { - "epoch": 25.21, - "learning_rate": 4.107142857142857e-05, - "loss": 0.8682, + "epoch": 14.08, + "learning_rate": 2.3011363636363636e-05, + "loss": 0.3284, "step": 1246 }, { - "epoch": 25.23, - "learning_rate": 4.094387755102041e-05, - "loss": 0.8426, + "epoch": 14.09, + "learning_rate": 2.2869318181818184e-05, + "loss": 0.3305, "step": 1247 }, { - "epoch": 25.25, - "learning_rate": 4.0816326530612245e-05, - "loss": 0.8948, + "epoch": 14.1, + "learning_rate": 2.272727272727273e-05, + "loss": 0.3447, "step": 1248 }, { - "epoch": 25.27, - "learning_rate": 4.068877551020408e-05, - "loss": 0.8333, + "epoch": 14.11, + "learning_rate": 2.2585227272727274e-05, + "loss": 0.344, "step": 1249 }, { - "epoch": 25.29, - "learning_rate": 4.056122448979592e-05, - "loss": 0.87, + "epoch": 14.12, + "learning_rate": 2.244318181818182e-05, + "loss": 0.3516, "step": 1250 }, { - "epoch": 25.31, - "learning_rate": 4.043367346938776e-05, - "loss": 0.8215, + "epoch": 14.13, + "learning_rate": 2.2301136363636365e-05, + "loss": 0.3417, "step": 1251 }, { - "epoch": 25.33, - "learning_rate": 4.0306122448979596e-05, - "loss": 0.862, + "epoch": 14.14, + "learning_rate": 2.215909090909091e-05, + "loss": 0.3353, "step": 1252 }, { - "epoch": 25.35, - "learning_rate": 4.017857142857143e-05, - "loss": 0.8607, + "epoch": 14.16, + "learning_rate": 2.2017045454545458e-05, + "loss": 0.3363, "step": 1253 }, { - "epoch": 25.37, - "learning_rate": 4.0051020408163264e-05, - "loss": 0.8218, + "epoch": 14.17, + "learning_rate": 2.1875e-05, + "loss": 0.3342, "step": 1254 }, { - "epoch": 25.39, - "learning_rate": 3.99234693877551e-05, - "loss": 0.806, + "epoch": 14.18, + "learning_rate": 2.1732954545454545e-05, + "loss": 0.3366, "step": 1255 }, { - "epoch": 25.41, - "learning_rate": 3.979591836734694e-05, - "loss": 0.8929, + "epoch": 14.19, + "learning_rate": 2.1590909090909093e-05, + "loss": 0.3333, "step": 1256 }, { - "epoch": 25.43, - "learning_rate": 3.966836734693878e-05, - "loss": 0.8551, + "epoch": 14.2, + "learning_rate": 2.1448863636363635e-05, + "loss": 0.3425, "step": 1257 }, { - "epoch": 25.45, - "learning_rate": 3.9540816326530615e-05, - "loss": 0.8408, + "epoch": 14.21, + "learning_rate": 2.1306818181818183e-05, + "loss": 0.3366, "step": 1258 }, { - "epoch": 25.47, - "learning_rate": 3.9413265306122446e-05, - "loss": 0.8819, + "epoch": 14.22, + "learning_rate": 2.116477272727273e-05, + "loss": 0.3324, "step": 1259 }, { - "epoch": 25.49, - "learning_rate": 3.928571428571429e-05, - "loss": 0.8757, + "epoch": 14.23, + "learning_rate": 2.1022727272727274e-05, + "loss": 0.3527, "step": 1260 }, { - "epoch": 25.52, - "learning_rate": 3.915816326530613e-05, - "loss": 0.8778, + "epoch": 14.25, + "learning_rate": 2.088068181818182e-05, + "loss": 0.3399, "step": 1261 }, { - "epoch": 25.54, - "learning_rate": 3.9030612244897965e-05, - "loss": 0.8524, + "epoch": 14.26, + "learning_rate": 2.0738636363636367e-05, + "loss": 0.3651, "step": 1262 }, { - "epoch": 25.56, - "learning_rate": 3.8903061224489796e-05, - "loss": 0.846, + "epoch": 14.27, + "learning_rate": 2.059659090909091e-05, + "loss": 0.3381, "step": 1263 }, { - "epoch": 25.58, - "learning_rate": 3.8775510204081634e-05, - "loss": 0.8757, + "epoch": 14.28, + "learning_rate": 2.0454545454545457e-05, + "loss": 0.3408, "step": 1264 }, { - "epoch": 25.6, - "learning_rate": 3.864795918367347e-05, - "loss": 0.9084, + "epoch": 14.29, + "learning_rate": 2.0312500000000002e-05, + "loss": 0.3237, "step": 1265 }, { - "epoch": 25.62, - "learning_rate": 3.852040816326531e-05, - "loss": 0.8826, + "epoch": 14.3, + "learning_rate": 2.0170454545454544e-05, + "loss": 0.3421, "step": 1266 }, { - "epoch": 25.64, - "learning_rate": 3.839285714285715e-05, - "loss": 0.8619, + "epoch": 14.31, + "learning_rate": 2.0028409090909093e-05, + "loss": 0.3361, "step": 1267 }, { - "epoch": 25.66, - "learning_rate": 3.826530612244898e-05, - "loss": 0.8942, + "epoch": 14.33, + "learning_rate": 1.9886363636363638e-05, + "loss": 0.3475, "step": 1268 }, { - "epoch": 25.68, - "learning_rate": 3.8137755102040815e-05, - "loss": 0.8342, + "epoch": 14.34, + "learning_rate": 1.9744318181818183e-05, + "loss": 0.3337, "step": 1269 }, { - "epoch": 25.7, - "learning_rate": 3.801020408163265e-05, - "loss": 0.8512, + "epoch": 14.35, + "learning_rate": 1.9602272727272728e-05, + "loss": 0.3384, "step": 1270 }, { - "epoch": 25.72, - "learning_rate": 3.788265306122449e-05, - "loss": 0.8393, + "epoch": 14.36, + "learning_rate": 1.9460227272727273e-05, + "loss": 0.3255, "step": 1271 }, { - "epoch": 25.74, - "learning_rate": 3.775510204081633e-05, - "loss": 0.8508, + "epoch": 14.37, + "learning_rate": 1.9318181818181818e-05, + "loss": 0.3568, "step": 1272 }, { - "epoch": 25.76, - "learning_rate": 3.7627551020408166e-05, - "loss": 0.9094, + "epoch": 14.38, + "learning_rate": 1.9176136363636366e-05, + "loss": 0.3427, "step": 1273 }, { - "epoch": 25.78, - "learning_rate": 3.7500000000000003e-05, - "loss": 0.9175, + "epoch": 14.39, + "learning_rate": 1.9034090909090908e-05, + "loss": 0.3468, "step": 1274 }, { - "epoch": 25.8, - "learning_rate": 3.737244897959184e-05, - "loss": 0.9179, + "epoch": 14.4, + "learning_rate": 1.8892045454545457e-05, + "loss": 0.3312, "step": 1275 }, { - "epoch": 25.82, - "learning_rate": 3.724489795918368e-05, - "loss": 0.869, + "epoch": 14.42, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.3376, "step": 1276 }, { - "epoch": 25.84, - "learning_rate": 3.711734693877551e-05, - "loss": 0.8568, + "epoch": 14.43, + "learning_rate": 1.8607954545454543e-05, + "loss": 0.3284, "step": 1277 }, { - "epoch": 25.86, - "learning_rate": 3.698979591836735e-05, - "loss": 0.9104, + "epoch": 14.44, + "learning_rate": 1.8465909090909092e-05, + "loss": 0.3378, "step": 1278 }, { - "epoch": 25.88, - "learning_rate": 3.6862244897959185e-05, - "loss": 0.8912, + "epoch": 14.45, + "learning_rate": 1.8323863636363637e-05, + "loss": 0.3331, "step": 1279 }, { - "epoch": 25.9, - "learning_rate": 3.673469387755102e-05, - "loss": 0.878, + "epoch": 14.46, + "learning_rate": 1.8181818181818182e-05, + "loss": 0.3297, "step": 1280 }, { - "epoch": 25.92, - "learning_rate": 3.6607142857142853e-05, - "loss": 0.8711, + "epoch": 14.47, + "learning_rate": 1.8039772727272727e-05, + "loss": 0.3491, "step": 1281 }, { - "epoch": 25.94, - "learning_rate": 3.64795918367347e-05, - "loss": 0.9404, + "epoch": 14.48, + "learning_rate": 1.7897727272727276e-05, + "loss": 0.3354, "step": 1282 }, { - "epoch": 25.96, - "learning_rate": 3.6352040816326536e-05, - "loss": 0.9302, + "epoch": 14.49, + "learning_rate": 1.7755681818181817e-05, + "loss": 0.3292, "step": 1283 }, { - "epoch": 25.98, - "learning_rate": 3.622448979591837e-05, - "loss": 0.8907, + "epoch": 14.51, + "learning_rate": 1.7613636363636366e-05, + "loss": 0.3419, "step": 1284 }, { - "epoch": 26.0, - "learning_rate": 3.609693877551021e-05, - "loss": 0.8473, + "epoch": 14.52, + "learning_rate": 1.747159090909091e-05, + "loss": 0.329, "step": 1285 }, { - "epoch": 26.02, - "learning_rate": 3.596938775510204e-05, - "loss": 0.8482, + "epoch": 14.53, + "learning_rate": 1.7329545454545456e-05, + "loss": 0.345, "step": 1286 }, { - "epoch": 26.04, - "learning_rate": 3.584183673469388e-05, - "loss": 0.8683, + "epoch": 14.54, + "learning_rate": 1.71875e-05, + "loss": 0.3369, "step": 1287 }, { - "epoch": 26.06, - "learning_rate": 3.571428571428572e-05, - "loss": 0.8443, + "epoch": 14.55, + "learning_rate": 1.7045454545454546e-05, + "loss": 0.3537, "step": 1288 }, { - "epoch": 26.08, - "learning_rate": 3.5586734693877555e-05, - "loss": 0.8462, + "epoch": 14.56, + "learning_rate": 1.690340909090909e-05, + "loss": 0.3418, "step": 1289 }, { - "epoch": 26.1, - "learning_rate": 3.5459183673469385e-05, - "loss": 0.8204, + "epoch": 14.57, + "learning_rate": 1.6761363636363636e-05, + "loss": 0.3514, "step": 1290 }, { - "epoch": 26.12, - "learning_rate": 3.533163265306122e-05, - "loss": 0.8632, + "epoch": 14.58, + "learning_rate": 1.6619318181818185e-05, + "loss": 0.3325, "step": 1291 }, { - "epoch": 26.14, - "learning_rate": 3.520408163265306e-05, - "loss": 0.8883, + "epoch": 14.6, + "learning_rate": 1.6477272727272726e-05, + "loss": 0.3492, "step": 1292 }, { - "epoch": 26.16, - "learning_rate": 3.50765306122449e-05, - "loss": 0.8369, + "epoch": 14.61, + "learning_rate": 1.6335227272727275e-05, + "loss": 0.3439, "step": 1293 }, { - "epoch": 26.18, - "learning_rate": 3.4948979591836736e-05, - "loss": 0.8369, + "epoch": 14.62, + "learning_rate": 1.619318181818182e-05, + "loss": 0.3401, "step": 1294 }, { - "epoch": 26.2, - "learning_rate": 3.4821428571428574e-05, - "loss": 0.8506, + "epoch": 14.63, + "learning_rate": 1.6051136363636365e-05, + "loss": 0.3373, "step": 1295 }, { - "epoch": 26.22, - "learning_rate": 3.469387755102041e-05, - "loss": 0.839, + "epoch": 14.64, + "learning_rate": 1.590909090909091e-05, + "loss": 0.339, "step": 1296 }, { - "epoch": 26.24, - "learning_rate": 3.456632653061225e-05, - "loss": 0.8421, + "epoch": 14.65, + "learning_rate": 1.5767045454545455e-05, + "loss": 0.3229, "step": 1297 }, { - "epoch": 26.26, - "learning_rate": 3.443877551020409e-05, - "loss": 0.8292, + "epoch": 14.66, + "learning_rate": 1.5625e-05, + "loss": 0.3369, "step": 1298 }, { - "epoch": 26.28, - "learning_rate": 3.431122448979592e-05, - "loss": 0.8412, + "epoch": 14.68, + "learning_rate": 1.5482954545454545e-05, + "loss": 0.3295, "step": 1299 }, { - "epoch": 26.3, - "learning_rate": 3.4183673469387755e-05, - "loss": 0.8305, + "epoch": 14.69, + "learning_rate": 1.534090909090909e-05, + "loss": 0.3333, "step": 1300 }, { - "epoch": 26.32, - "learning_rate": 3.405612244897959e-05, - "loss": 0.8181, + "epoch": 14.7, + "learning_rate": 1.5198863636363636e-05, + "loss": 0.3455, "step": 1301 }, { - "epoch": 26.34, - "learning_rate": 3.392857142857143e-05, - "loss": 0.8588, + "epoch": 14.71, + "learning_rate": 1.5056818181818182e-05, + "loss": 0.3331, "step": 1302 }, { - "epoch": 26.36, - "learning_rate": 3.380102040816326e-05, - "loss": 0.8528, + "epoch": 14.72, + "learning_rate": 1.4914772727272727e-05, + "loss": 0.3281, "step": 1303 }, { - "epoch": 26.39, - "learning_rate": 3.36734693877551e-05, - "loss": 0.9055, + "epoch": 14.73, + "learning_rate": 1.4772727272727274e-05, + "loss": 0.3222, "step": 1304 }, { - "epoch": 26.41, - "learning_rate": 3.354591836734694e-05, - "loss": 0.8762, + "epoch": 14.74, + "learning_rate": 1.4630681818181818e-05, + "loss": 0.3265, "step": 1305 }, { - "epoch": 26.43, - "learning_rate": 3.341836734693878e-05, - "loss": 0.8507, + "epoch": 14.75, + "learning_rate": 1.4488636363636366e-05, + "loss": 0.3312, "step": 1306 }, { - "epoch": 26.45, - "learning_rate": 3.329081632653062e-05, - "loss": 0.8541, + "epoch": 14.77, + "learning_rate": 1.434659090909091e-05, + "loss": 0.3406, "step": 1307 }, { - "epoch": 26.47, - "learning_rate": 3.316326530612245e-05, - "loss": 0.8205, + "epoch": 14.78, + "learning_rate": 1.4204545454545456e-05, + "loss": 0.3404, "step": 1308 }, { - "epoch": 26.49, - "learning_rate": 3.303571428571429e-05, - "loss": 0.8133, + "epoch": 14.79, + "learning_rate": 1.4062500000000001e-05, + "loss": 0.3293, "step": 1309 }, { - "epoch": 26.51, - "learning_rate": 3.2908163265306125e-05, - "loss": 0.8854, + "epoch": 14.8, + "learning_rate": 1.3920454545454545e-05, + "loss": 0.3317, "step": 1310 }, { - "epoch": 26.53, - "learning_rate": 3.278061224489796e-05, - "loss": 0.9397, + "epoch": 14.81, + "learning_rate": 1.3778409090909091e-05, + "loss": 0.3334, "step": 1311 }, { - "epoch": 26.55, - "learning_rate": 3.265306122448979e-05, - "loss": 0.854, + "epoch": 14.82, + "learning_rate": 1.3636363636363637e-05, + "loss": 0.332, "step": 1312 }, { - "epoch": 26.57, - "learning_rate": 3.252551020408163e-05, - "loss": 0.8617, + "epoch": 14.83, + "learning_rate": 1.3494318181818183e-05, + "loss": 0.3313, "step": 1313 }, { - "epoch": 26.59, - "learning_rate": 3.239795918367347e-05, - "loss": 0.8739, + "epoch": 14.84, + "learning_rate": 1.3352272727272727e-05, + "loss": 0.3374, "step": 1314 }, { - "epoch": 26.61, - "learning_rate": 3.2270408163265306e-05, - "loss": 0.8139, + "epoch": 14.86, + "learning_rate": 1.3210227272727273e-05, + "loss": 0.3238, "step": 1315 }, { - "epoch": 26.63, - "learning_rate": 3.2142857142857144e-05, - "loss": 0.7575, + "epoch": 14.87, + "learning_rate": 1.3068181818181819e-05, + "loss": 0.3391, "step": 1316 }, { - "epoch": 26.65, - "learning_rate": 3.201530612244898e-05, - "loss": 0.846, + "epoch": 14.88, + "learning_rate": 1.2926136363636365e-05, + "loss": 0.3329, "step": 1317 }, { - "epoch": 26.67, - "learning_rate": 3.188775510204082e-05, - "loss": 0.8797, + "epoch": 14.89, + "learning_rate": 1.2784090909090909e-05, + "loss": 0.3312, "step": 1318 }, { - "epoch": 26.69, - "learning_rate": 3.176020408163266e-05, - "loss": 0.8525, + "epoch": 14.9, + "learning_rate": 1.2642045454545457e-05, + "loss": 0.3405, "step": 1319 }, { - "epoch": 26.71, - "learning_rate": 3.1632653061224494e-05, - "loss": 0.8276, + "epoch": 14.91, + "learning_rate": 1.25e-05, + "loss": 0.3253, "step": 1320 }, { - "epoch": 26.73, - "learning_rate": 3.1505102040816325e-05, - "loss": 0.8734, + "epoch": 14.92, + "learning_rate": 1.2357954545454546e-05, + "loss": 0.3335, "step": 1321 }, { - "epoch": 26.75, - "learning_rate": 3.137755102040816e-05, - "loss": 0.8663, + "epoch": 14.94, + "learning_rate": 1.2215909090909092e-05, + "loss": 0.34, "step": 1322 }, { - "epoch": 26.77, - "learning_rate": 3.125e-05, - "loss": 0.8354, + "epoch": 14.95, + "learning_rate": 1.2073863636363638e-05, + "loss": 0.3293, "step": 1323 }, { - "epoch": 26.79, - "learning_rate": 3.112244897959184e-05, - "loss": 0.8374, + "epoch": 14.96, + "learning_rate": 1.1931818181818183e-05, + "loss": 0.3315, "step": 1324 }, { - "epoch": 26.81, - "learning_rate": 3.0994897959183676e-05, - "loss": 0.9025, + "epoch": 14.97, + "learning_rate": 1.1789772727272728e-05, + "loss": 0.3338, "step": 1325 }, { - "epoch": 26.83, - "learning_rate": 3.086734693877551e-05, - "loss": 0.8618, + "epoch": 14.98, + "learning_rate": 1.1647727272727273e-05, + "loss": 0.3427, "step": 1326 }, { - "epoch": 26.85, - "learning_rate": 3.073979591836735e-05, - "loss": 0.8867, + "epoch": 14.99, + "learning_rate": 1.1505681818181818e-05, + "loss": 0.337, "step": 1327 }, { - "epoch": 26.87, - "learning_rate": 3.061224489795919e-05, - "loss": 0.7864, + "epoch": 15.0, + "learning_rate": 1.1363636363636365e-05, + "loss": 0.3317, "step": 1328 }, { - "epoch": 26.89, - "learning_rate": 3.0484693877551023e-05, - "loss": 0.7706, + "epoch": 15.01, + "learning_rate": 1.122159090909091e-05, + "loss": 0.3259, "step": 1329 }, { - "epoch": 26.91, - "learning_rate": 3.0357142857142857e-05, - "loss": 0.8677, + "epoch": 15.03, + "learning_rate": 1.1079545454545455e-05, + "loss": 0.3323, "step": 1330 }, { - "epoch": 26.93, - "learning_rate": 3.0229591836734695e-05, - "loss": 0.8619, + "epoch": 15.04, + "learning_rate": 1.09375e-05, + "loss": 0.3267, "step": 1331 }, { - "epoch": 26.95, - "learning_rate": 3.0102040816326533e-05, - "loss": 0.8487, + "epoch": 15.05, + "learning_rate": 1.0795454545454547e-05, + "loss": 0.3296, "step": 1332 }, { - "epoch": 26.97, - "learning_rate": 2.997448979591837e-05, - "loss": 0.8644, + "epoch": 15.06, + "learning_rate": 1.0653409090909092e-05, + "loss": 0.3169, "step": 1333 }, { - "epoch": 26.99, - "learning_rate": 2.9846938775510208e-05, - "loss": 0.8779, + "epoch": 15.07, + "learning_rate": 1.0511363636363637e-05, + "loss": 0.3228, "step": 1334 }, { - "epoch": 27.01, - "learning_rate": 2.9719387755102042e-05, - "loss": 0.8589, + "epoch": 15.08, + "learning_rate": 1.0369318181818184e-05, + "loss": 0.3382, "step": 1335 }, { - "epoch": 27.03, - "learning_rate": 2.959183673469388e-05, - "loss": 0.8214, + "epoch": 15.09, + "learning_rate": 1.0227272727272729e-05, + "loss": 0.3271, "step": 1336 }, { - "epoch": 27.05, - "learning_rate": 2.9464285714285718e-05, - "loss": 0.7907, + "epoch": 15.1, + "learning_rate": 1.0085227272727272e-05, + "loss": 0.3388, "step": 1337 }, { - "epoch": 27.07, - "learning_rate": 2.9336734693877555e-05, - "loss": 0.8493, + "epoch": 15.12, + "learning_rate": 9.943181818181819e-06, + "loss": 0.3242, "step": 1338 }, { - "epoch": 27.09, - "learning_rate": 2.9209183673469386e-05, - "loss": 0.8423, + "epoch": 15.13, + "learning_rate": 9.801136363636364e-06, + "loss": 0.3373, "step": 1339 }, { - "epoch": 27.11, - "learning_rate": 2.9081632653061224e-05, - "loss": 0.7737, + "epoch": 15.14, + "learning_rate": 9.659090909090909e-06, + "loss": 0.325, "step": 1340 }, { - "epoch": 27.13, - "learning_rate": 2.895408163265306e-05, - "loss": 0.792, + "epoch": 15.15, + "learning_rate": 9.517045454545454e-06, + "loss": 0.3304, "step": 1341 }, { - "epoch": 27.15, - "learning_rate": 2.8826530612244902e-05, - "loss": 0.8145, + "epoch": 15.16, + "learning_rate": 9.375000000000001e-06, + "loss": 0.3291, "step": 1342 }, { - "epoch": 27.17, - "learning_rate": 2.869897959183674e-05, - "loss": 0.8356, + "epoch": 15.17, + "learning_rate": 9.232954545454546e-06, + "loss": 0.322, "step": 1343 }, { - "epoch": 27.19, - "learning_rate": 2.857142857142857e-05, - "loss": 0.815, + "epoch": 15.18, + "learning_rate": 9.090909090909091e-06, + "loss": 0.3499, "step": 1344 }, { - "epoch": 27.21, - "learning_rate": 2.844387755102041e-05, - "loss": 0.7806, + "epoch": 15.2, + "learning_rate": 8.948863636363638e-06, + "loss": 0.3307, "step": 1345 }, { - "epoch": 27.23, - "learning_rate": 2.8316326530612246e-05, - "loss": 0.835, + "epoch": 15.21, + "learning_rate": 8.806818181818183e-06, + "loss": 0.3527, "step": 1346 }, { - "epoch": 27.26, - "learning_rate": 2.8188775510204084e-05, - "loss": 0.8514, + "epoch": 15.22, + "learning_rate": 8.664772727272728e-06, + "loss": 0.325, "step": 1347 }, { - "epoch": 27.28, - "learning_rate": 2.8061224489795918e-05, - "loss": 0.8251, + "epoch": 15.23, + "learning_rate": 8.522727272727273e-06, + "loss": 0.3353, "step": 1348 }, { - "epoch": 27.3, - "learning_rate": 2.7933673469387756e-05, - "loss": 0.8456, + "epoch": 15.24, + "learning_rate": 8.380681818181818e-06, + "loss": 0.3316, "step": 1349 }, { - "epoch": 27.32, - "learning_rate": 2.7806122448979593e-05, - "loss": 0.8925, + "epoch": 15.25, + "learning_rate": 8.238636363636363e-06, + "loss": 0.3347, "step": 1350 }, { - "epoch": 27.34, - "learning_rate": 2.767857142857143e-05, - "loss": 0.8284, + "epoch": 15.26, + "learning_rate": 8.09659090909091e-06, + "loss": 0.3275, "step": 1351 }, { - "epoch": 27.36, - "learning_rate": 2.7551020408163265e-05, - "loss": 0.8471, + "epoch": 15.27, + "learning_rate": 7.954545454545455e-06, + "loss": 0.3288, "step": 1352 }, { - "epoch": 27.38, - "learning_rate": 2.7423469387755103e-05, - "loss": 0.819, + "epoch": 15.29, + "learning_rate": 7.8125e-06, + "loss": 0.3293, "step": 1353 }, { - "epoch": 27.4, - "learning_rate": 2.729591836734694e-05, - "loss": 0.8474, + "epoch": 15.3, + "learning_rate": 7.670454545454545e-06, + "loss": 0.3324, "step": 1354 }, { - "epoch": 27.42, - "learning_rate": 2.7168367346938778e-05, - "loss": 0.8378, + "epoch": 15.31, + "learning_rate": 7.528409090909091e-06, + "loss": 0.3286, "step": 1355 }, { - "epoch": 27.44, - "learning_rate": 2.7040816326530616e-05, - "loss": 0.8383, + "epoch": 15.32, + "learning_rate": 7.386363636363637e-06, + "loss": 0.3355, "step": 1356 }, { - "epoch": 27.46, - "learning_rate": 2.6913265306122447e-05, - "loss": 0.8534, + "epoch": 15.33, + "learning_rate": 7.244318181818183e-06, + "loss": 0.3353, "step": 1357 }, { - "epoch": 27.48, - "learning_rate": 2.6785714285714288e-05, - "loss": 0.8243, + "epoch": 15.34, + "learning_rate": 7.102272727272728e-06, + "loss": 0.3291, "step": 1358 }, { - "epoch": 27.5, - "learning_rate": 2.6658163265306125e-05, - "loss": 0.8467, + "epoch": 15.35, + "learning_rate": 6.960227272727272e-06, + "loss": 0.3358, "step": 1359 }, { - "epoch": 27.52, - "learning_rate": 2.6530612244897963e-05, - "loss": 0.8503, + "epoch": 15.36, + "learning_rate": 6.818181818181818e-06, + "loss": 0.3289, "step": 1360 }, { - "epoch": 27.54, - "learning_rate": 2.6403061224489794e-05, - "loss": 0.7655, + "epoch": 15.38, + "learning_rate": 6.676136363636363e-06, + "loss": 0.3253, "step": 1361 }, { - "epoch": 27.56, - "learning_rate": 2.627551020408163e-05, - "loss": 0.854, + "epoch": 15.39, + "learning_rate": 6.534090909090909e-06, + "loss": 0.3172, "step": 1362 }, { - "epoch": 27.58, - "learning_rate": 2.614795918367347e-05, - "loss": 0.838, + "epoch": 15.4, + "learning_rate": 6.392045454545454e-06, + "loss": 0.3192, "step": 1363 }, { - "epoch": 27.6, - "learning_rate": 2.6020408163265307e-05, - "loss": 0.8275, + "epoch": 15.41, + "learning_rate": 6.25e-06, + "loss": 0.3228, "step": 1364 }, { - "epoch": 27.62, - "learning_rate": 2.5892857142857148e-05, - "loss": 0.8494, + "epoch": 15.42, + "learning_rate": 6.107954545454546e-06, + "loss": 0.3269, "step": 1365 }, { - "epoch": 27.64, - "learning_rate": 2.576530612244898e-05, - "loss": 0.842, + "epoch": 15.43, + "learning_rate": 5.965909090909091e-06, + "loss": 0.3222, "step": 1366 }, { - "epoch": 27.66, - "learning_rate": 2.5637755102040816e-05, - "loss": 0.8176, + "epoch": 15.44, + "learning_rate": 5.823863636363636e-06, + "loss": 0.3382, "step": 1367 }, { - "epoch": 27.68, - "learning_rate": 2.5510204081632654e-05, - "loss": 0.8301, + "epoch": 15.45, + "learning_rate": 5.681818181818182e-06, + "loss": 0.3259, "step": 1368 }, { - "epoch": 27.7, - "learning_rate": 2.538265306122449e-05, - "loss": 0.8182, + "epoch": 15.47, + "learning_rate": 5.539772727272727e-06, + "loss": 0.3187, "step": 1369 }, { - "epoch": 27.72, - "learning_rate": 2.5255102040816326e-05, - "loss": 0.8067, + "epoch": 15.48, + "learning_rate": 5.397727272727273e-06, + "loss": 0.3283, "step": 1370 }, { - "epoch": 27.74, - "learning_rate": 2.5127551020408164e-05, - "loss": 0.8322, + "epoch": 15.49, + "learning_rate": 5.255681818181818e-06, + "loss": 0.3197, "step": 1371 }, { - "epoch": 27.76, - "learning_rate": 2.5e-05, - "loss": 0.828, + "epoch": 15.5, + "learning_rate": 5.113636363636364e-06, + "loss": 0.3508, "step": 1372 }, { - "epoch": 27.78, - "learning_rate": 2.487244897959184e-05, - "loss": 0.8583, + "epoch": 15.51, + "learning_rate": 4.9715909090909094e-06, + "loss": 0.3429, "step": 1373 }, { - "epoch": 27.8, - "learning_rate": 2.4744897959183673e-05, - "loss": 0.8273, + "epoch": 15.52, + "learning_rate": 4.8295454545454545e-06, + "loss": 0.3444, "step": 1374 }, { - "epoch": 27.82, - "learning_rate": 2.461734693877551e-05, - "loss": 0.8292, + "epoch": 15.53, + "learning_rate": 4.6875000000000004e-06, + "loss": 0.3289, "step": 1375 }, { - "epoch": 27.84, - "learning_rate": 2.448979591836735e-05, - "loss": 0.9004, + "epoch": 15.55, + "learning_rate": 4.5454545454545455e-06, + "loss": 0.3307, "step": 1376 }, { - "epoch": 27.86, - "learning_rate": 2.4362244897959186e-05, - "loss": 0.8589, + "epoch": 15.56, + "learning_rate": 4.4034090909090914e-06, + "loss": 0.3295, "step": 1377 }, { - "epoch": 27.88, - "learning_rate": 2.423469387755102e-05, - "loss": 0.8559, + "epoch": 15.57, + "learning_rate": 4.2613636363636365e-06, + "loss": 0.3241, "step": 1378 }, { - "epoch": 27.9, - "learning_rate": 2.4107142857142858e-05, - "loss": 0.8224, + "epoch": 15.58, + "learning_rate": 4.119318181818182e-06, + "loss": 0.3213, "step": 1379 }, { - "epoch": 27.92, - "learning_rate": 2.3979591836734696e-05, - "loss": 0.8438, + "epoch": 15.59, + "learning_rate": 3.9772727272727275e-06, + "loss": 0.3289, "step": 1380 }, { - "epoch": 27.94, - "learning_rate": 2.3852040816326533e-05, - "loss": 0.8267, + "epoch": 15.6, + "learning_rate": 3.835227272727273e-06, + "loss": 0.3272, "step": 1381 }, { - "epoch": 27.96, - "learning_rate": 2.372448979591837e-05, - "loss": 0.8472, + "epoch": 15.61, + "learning_rate": 3.6931818181818186e-06, + "loss": 0.3257, "step": 1382 }, { - "epoch": 27.98, - "learning_rate": 2.3596938775510205e-05, - "loss": 0.835, + "epoch": 15.62, + "learning_rate": 3.551136363636364e-06, + "loss": 0.3396, "step": 1383 }, { - "epoch": 28.0, - "learning_rate": 2.3469387755102043e-05, - "loss": 0.847, + "epoch": 15.64, + "learning_rate": 3.409090909090909e-06, + "loss": 0.3163, "step": 1384 }, { - "epoch": 28.02, - "learning_rate": 2.3341836734693877e-05, - "loss": 0.865, + "epoch": 15.65, + "learning_rate": 3.2670454545454546e-06, + "loss": 0.3268, "step": 1385 }, { - "epoch": 28.04, - "learning_rate": 2.3214285714285715e-05, - "loss": 0.807, + "epoch": 15.66, + "learning_rate": 3.125e-06, + "loss": 0.3203, "step": 1386 }, { - "epoch": 28.06, - "learning_rate": 2.3086734693877552e-05, - "loss": 0.8133, + "epoch": 15.67, + "learning_rate": 2.9829545454545457e-06, + "loss": 0.338, "step": 1387 }, { - "epoch": 28.08, - "learning_rate": 2.295918367346939e-05, - "loss": 0.8242, + "epoch": 15.68, + "learning_rate": 2.840909090909091e-06, + "loss": 0.325, "step": 1388 }, { - "epoch": 28.1, - "learning_rate": 2.2831632653061224e-05, - "loss": 0.8142, + "epoch": 15.69, + "learning_rate": 2.6988636363636367e-06, + "loss": 0.3236, "step": 1389 }, { - "epoch": 28.13, - "learning_rate": 2.2704081632653062e-05, - "loss": 0.7772, + "epoch": 15.7, + "learning_rate": 2.556818181818182e-06, + "loss": 0.3247, "step": 1390 }, { - "epoch": 28.15, - "learning_rate": 2.2576530612244896e-05, - "loss": 0.7885, + "epoch": 15.71, + "learning_rate": 2.4147727272727273e-06, + "loss": 0.3312, "step": 1391 }, { - "epoch": 28.17, - "learning_rate": 2.2448979591836737e-05, - "loss": 0.8096, + "epoch": 15.73, + "learning_rate": 2.2727272727272728e-06, + "loss": 0.3431, "step": 1392 }, { - "epoch": 28.19, - "learning_rate": 2.2321428571428575e-05, - "loss": 0.8497, + "epoch": 15.74, + "learning_rate": 2.1306818181818183e-06, + "loss": 0.3205, "step": 1393 }, { - "epoch": 28.21, - "learning_rate": 2.219387755102041e-05, - "loss": 0.8814, + "epoch": 15.75, + "learning_rate": 1.9886363636363638e-06, + "loss": 0.3273, "step": 1394 }, { - "epoch": 28.23, - "learning_rate": 2.2066326530612247e-05, - "loss": 0.8634, + "epoch": 15.76, + "learning_rate": 1.8465909090909093e-06, + "loss": 0.3253, "step": 1395 }, { - "epoch": 28.25, - "learning_rate": 2.193877551020408e-05, - "loss": 0.8084, + "epoch": 15.77, + "learning_rate": 1.7045454545454546e-06, + "loss": 0.3279, "step": 1396 }, { - "epoch": 28.27, - "learning_rate": 2.181122448979592e-05, - "loss": 0.7792, + "epoch": 15.78, + "learning_rate": 1.5625e-06, + "loss": 0.3338, "step": 1397 }, { - "epoch": 28.29, - "learning_rate": 2.1683673469387756e-05, - "loss": 0.7998, + "epoch": 15.79, + "learning_rate": 1.4204545454545456e-06, + "loss": 0.3292, "step": 1398 }, { - "epoch": 28.31, - "learning_rate": 2.1556122448979594e-05, - "loss": 0.7963, + "epoch": 15.81, + "learning_rate": 1.278409090909091e-06, + "loss": 0.324, "step": 1399 }, { - "epoch": 28.33, - "learning_rate": 2.1428571428571428e-05, - "loss": 0.8058, + "epoch": 15.82, + "learning_rate": 1.1363636363636364e-06, + "loss": 0.31, "step": 1400 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 2.0726121875201434e+18, + "total_flos": 1.9109229260683162e+18, "trial_name": null, "trial_params": null } diff --git a/checkpoint-1400/training_args.bin b/checkpoint-1400/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-1400/training_args.bin +++ b/checkpoint-1400/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..08371015f02382e6fcba318f4aaea54ae52cd3c4 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,34 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: True +- load_in_4bit: False +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: fp4 +- bnb_4bit_use_double_quant: False +- bnb_4bit_compute_dtype: float32 + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: True +- load_in_4bit: False +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: fp4 +- bnb_4bit_use_double_quant: False +- bnb_4bit_compute_dtype: float32 +### Framework versions + +- PEFT 0.6.0.dev0 + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..751d838ac0c1ae5ca71ca448b25d7a8a0173f01b --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,23 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "bigscience/bloomz-3b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.0, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "dense_4h_to_h", + "dense", + "dense_h_to_4h", + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.bin b/checkpoint-500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..29fa2903b59a1caaeb699cae9c531c16898a763c --- /dev/null +++ b/checkpoint-500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297ef8417e7df986ce834d9b0c8ebd28197873ea409686414f04e91b74281978 +size 39409357 diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e94c574163d8d7559e98a1529bd1fb5f3f661fc --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8afe492977b67949412a751b16cf3c9e70d22ef10182d710a235c40cb1a4ac +size 78844421 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..52c3c528216530c307d1385ed9ffbc3fc18fe283 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6162bb9db25c89c41e126a7a00a5d0695219447bff9b18d08731531620758440 +size 14575 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf19db9ba23ce9daa53e3253bf2841a2fd659a36 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5434e83c78ab052927972350ed56fbd84392b488b1d09a11bdb87201790659f +size 627 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..fdafe480f024ff444c7492147536765ce5d55a2d --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..dbf002cafbd4818dcff2abc9156c088d681b4533 --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a208233d2ee8d8c83b23bc214df737c44806a1919f444e89b31e586cd956ba +size 14500471 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b56cc9c2965c07132c35df3e2972e93d98c82c3 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,10 @@ +{ + "add_prefix_space": false, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "tokenizer_class": "BloomTokenizer", + "unk_token": "" +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9a1fb94e52a3f74ba72094e87380c24b49a9bf44 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,3019 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.648720211827008, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, + "step": 1 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, + "step": 2 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, + "step": 3 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, + "step": 4 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, + "step": 5 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, + "step": 6 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, + "step": 7 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, + "step": 8 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, + "step": 9 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, + "step": 10 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, + "step": 11 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, + "step": 12 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, + "step": 13 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, + "step": 14 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, + "step": 15 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, + "step": 16 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, + "step": 17 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, + "step": 18 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, + "step": 19 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, + "step": 20 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, + "step": 21 + }, + { + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, + "step": 22 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, + "step": 23 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, + "step": 24 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, + "step": 25 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, + "step": 26 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, + "step": 27 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, + "step": 28 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, + "step": 29 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, + "step": 30 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, + "step": 31 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, + "step": 32 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, + "step": 33 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, + "step": 34 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, + "step": 35 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, + "step": 36 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, + "step": 37 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, + "step": 38 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, + "step": 39 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, + "step": 40 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, + "step": 41 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, + "step": 42 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, + "step": 43 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, + "step": 44 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, + "step": 45 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, + "step": 46 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, + "step": 47 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, + "step": 48 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, + "step": 49 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, + "step": 50 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, + "step": 51 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, + "step": 52 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, + "step": 53 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, + "step": 54 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, + "step": 55 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, + "step": 56 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, + "step": 57 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, + "step": 58 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, + "step": 59 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, + "step": 60 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, + "step": 61 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, + "step": 62 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, + "step": 63 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, + "step": 64 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, + "step": 65 + }, + { + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, + "step": 66 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, + "step": 67 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, + "step": 68 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, + "step": 69 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, + "step": 70 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, + "step": 71 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, + "step": 72 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, + "step": 73 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, + "step": 74 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, + "step": 75 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, + "step": 76 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, + "step": 77 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, + "step": 78 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, + "step": 79 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, + "step": 80 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, + "step": 81 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, + "step": 82 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, + "step": 83 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, + "step": 84 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, + "step": 85 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, + "step": 86 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, + "step": 87 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, + "step": 88 + }, + { + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, + "step": 89 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, + "step": 90 + }, + { + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, + "step": 91 + }, + { + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, + "step": 92 + }, + { + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, + "step": 93 + }, + { + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, + "step": 94 + }, + { + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, + "step": 95 + }, + { + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, + "step": 96 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, + "step": 97 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, + "step": 98 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, + "step": 99 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, + "step": 100 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, + "step": 101 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, + "step": 102 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, + "step": 103 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, + "step": 104 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, + "step": 105 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, + "step": 106 + }, + { + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, + "step": 107 + }, + { + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, + "step": 108 + }, + { + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, + "step": 109 + }, + { + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, + "step": 110 + }, + { + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, + "step": 111 + }, + { + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, + "step": 112 + }, + { + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, + "step": 113 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, + "step": 114 + }, + { + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, + "step": 115 + }, + { + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, + "step": 116 + }, + { + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, + "step": 117 + }, + { + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, + "step": 118 + }, + { + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, + "step": 119 + }, + { + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, + "step": 120 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, + "step": 121 + }, + { + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, + "step": 122 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, + "step": 123 + }, + { + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, + "step": 124 + }, + { + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, + "step": 125 + }, + { + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, + "step": 126 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, + "step": 127 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, + "step": 128 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, + "step": 129 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, + "step": 130 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, + "step": 131 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, + "step": 132 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, + "step": 133 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, + "step": 134 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, + "step": 135 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, + "step": 136 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, + "step": 137 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, + "step": 138 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, + "step": 139 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, + "step": 140 + }, + { + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, + "step": 141 + }, + { + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, + "step": 142 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, + "step": 143 + }, + { + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, + "step": 144 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, + "step": 145 + }, + { + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, + "step": 146 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, + "step": 147 + }, + { + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, + "step": 148 + }, + { + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, + "step": 149 + }, + { + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, + "step": 150 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, + "step": 151 + }, + { + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, + "step": 152 + }, + { + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, + "step": 153 + }, + { + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, + "step": 154 + }, + { + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, + "step": 155 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, + "step": 156 + }, + { + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, + "step": 157 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, + "step": 158 + }, + { + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, + "step": 159 + }, + { + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, + "step": 160 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, + "step": 161 + }, + { + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, + "step": 162 + }, + { + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, + "step": 163 + }, + { + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, + "step": 164 + }, + { + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, + "step": 165 + }, + { + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, + "step": 166 + }, + { + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, + "step": 167 + }, + { + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, + "step": 168 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, + "step": 169 + }, + { + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, + "step": 170 + }, + { + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, + "step": 171 + }, + { + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, + "step": 172 + }, + { + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, + "step": 173 + }, + { + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, + "step": 174 + }, + { + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, + "step": 175 + }, + { + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, + "step": 176 + }, + { + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, + "step": 177 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, + "step": 178 + }, + { + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, + "step": 179 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, + "step": 180 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, + "step": 181 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, + "step": 182 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, + "step": 183 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, + "step": 184 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, + "step": 185 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, + "step": 186 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, + "step": 187 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, + "step": 188 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, + "step": 189 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, + "step": 190 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, + "step": 191 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, + "step": 192 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, + "step": 193 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, + "step": 194 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, + "step": 195 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, + "step": 196 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, + "step": 197 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, + "step": 198 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, + "step": 199 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, + "step": 200 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, + "step": 201 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, + "step": 202 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, + "step": 203 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, + "step": 204 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, + "step": 205 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, + "step": 206 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, + "step": 207 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, + "step": 208 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, + "step": 209 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, + "step": 210 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, + "step": 211 + }, + { + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, + "step": 212 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, + "step": 213 + }, + { + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, + "step": 214 + }, + { + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, + "step": 215 + }, + { + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, + "step": 216 + }, + { + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, + "step": 217 + }, + { + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, + "step": 218 + }, + { + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, + "step": 219 + }, + { + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, + "step": 220 + }, + { + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, + "step": 221 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, + "step": 222 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, + "step": 223 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, + "step": 224 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, + "step": 225 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, + "step": 226 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, + "step": 227 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, + "step": 228 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, + "step": 229 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, + "step": 230 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, + "step": 231 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, + "step": 232 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, + "step": 233 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, + "step": 234 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, + "step": 235 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, + "step": 236 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, + "step": 237 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, + "step": 238 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, + "step": 239 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, + "step": 240 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, + "step": 241 + }, + { + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, + "step": 242 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, + "step": 243 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, + "step": 244 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, + "step": 245 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, + "step": 246 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, + "step": 247 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, + "step": 248 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, + "step": 249 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, + "step": 250 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, + "step": 251 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, + "step": 252 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, + "step": 253 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, + "step": 254 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, + "step": 255 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, + "step": 256 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, + "step": 257 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, + "step": 258 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, + "step": 259 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, + "step": 260 + }, + { + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, + "step": 261 + }, + { + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, + "step": 262 + }, + { + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, + "step": 263 + }, + { + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, + "step": 264 + }, + { + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, + "step": 265 + }, + { + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, + "step": 266 + }, + { + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, + "step": 267 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, + "step": 268 + }, + { + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, + "step": 269 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, + "step": 270 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, + "step": 271 + }, + { + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, + "step": 272 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, + "step": 273 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, + "step": 274 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, + "step": 275 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, + "step": 276 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, + "step": 277 + }, + { + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, + "step": 278 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, + "step": 279 + }, + { + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, + "step": 280 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, + "step": 281 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, + "step": 282 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, + "step": 283 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, + "step": 284 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, + "step": 285 + }, + { + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, + "step": 286 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, + "step": 287 + }, + { + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, + "step": 288 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, + "step": 289 + }, + { + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, + "step": 290 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, + "step": 291 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, + "step": 292 + }, + { + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, + "step": 293 + }, + { + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, + "step": 294 + }, + { + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, + "step": 295 + }, + { + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, + "step": 296 + }, + { + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, + "step": 297 + }, + { + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, + "step": 298 + }, + { + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, + "step": 299 + }, + { + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, + "step": 300 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, + "step": 301 + }, + { + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, + "step": 302 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, + "step": 303 + }, + { + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, + "step": 304 + }, + { + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, + "step": 305 + }, + { + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, + "step": 306 + }, + { + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, + "step": 307 + }, + { + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, + "step": 308 + }, + { + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, + "step": 309 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, + "step": 310 + }, + { + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, + "step": 311 + }, + { + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, + "step": 312 + }, + { + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, + "step": 313 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, + "step": 314 + }, + { + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, + "step": 315 + }, + { + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, + "step": 316 + }, + { + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, + "step": 317 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, + "step": 318 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, + "step": 319 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, + "step": 320 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, + "step": 321 + }, + { + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, + "step": 322 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, + "step": 323 + }, + { + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, + "step": 324 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, + "step": 325 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, + "step": 326 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, + "step": 327 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, + "step": 328 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, + "step": 329 + }, + { + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, + "step": 330 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, + "step": 331 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, + "step": 332 + }, + { + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, + "step": 333 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, + "step": 334 + }, + { + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, + "step": 335 + }, + { + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, + "step": 336 + }, + { + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, + "step": 337 + }, + { + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, + "step": 338 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, + "step": 339 + }, + { + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, + "step": 340 + }, + { + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, + "step": 341 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, + "step": 342 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, + "step": 343 + }, + { + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, + "step": 344 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, + "step": 345 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, + "step": 346 + }, + { + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, + "step": 347 + }, + { + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, + "step": 348 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, + "step": 349 + }, + { + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, + "step": 350 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, + "step": 351 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, + "step": 352 + }, + { + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, + "step": 353 + }, + { + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, + "step": 354 + }, + { + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, + "step": 355 + }, + { + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, + "step": 356 + }, + { + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, + "step": 357 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, + "step": 358 + }, + { + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, + "step": 359 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, + "step": 360 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, + "step": 361 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, + "step": 362 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, + "step": 363 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, + "step": 364 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, + "step": 365 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, + "step": 366 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, + "step": 367 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, + "step": 368 + }, + { + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, + "step": 369 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, + "step": 370 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, + "step": 371 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, + "step": 372 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, + "step": 373 + }, + { + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, + "step": 374 + }, + { + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, + "step": 375 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, + "step": 376 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, + "step": 377 + }, + { + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, + "step": 378 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, + "step": 379 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, + "step": 380 + }, + { + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, + "step": 381 + }, + { + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, + "step": 382 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, + "step": 383 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, + "step": 384 + }, + { + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, + "step": 385 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, + "step": 386 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, + "step": 387 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, + "step": 388 + }, + { + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, + "step": 389 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, + "step": 390 + }, + { + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, + "step": 391 + }, + { + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, + "step": 392 + }, + { + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, + "step": 393 + }, + { + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, + "step": 394 + }, + { + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, + "step": 395 + }, + { + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, + "step": 396 + }, + { + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, + "step": 397 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, + "step": 398 + }, + { + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, + "step": 399 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, + "step": 400 + }, + { + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, + "step": 401 + }, + { + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, + "step": 402 + }, + { + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, + "step": 403 + }, + { + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, + "step": 404 + }, + { + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, + "step": 405 + }, + { + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, + "step": 406 + }, + { + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, + "step": 407 + }, + { + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, + "step": 408 + }, + { + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, + "step": 409 + }, + { + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, + "step": 410 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, + "step": 411 + }, + { + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, + "step": 412 + }, + { + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, + "step": 413 + }, + { + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, + "step": 414 + }, + { + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, + "step": 415 + }, + { + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, + "step": 416 + }, + { + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, + "step": 417 + }, + { + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, + "step": 418 + }, + { + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, + "step": 419 + }, + { + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, + "step": 420 + }, + { + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, + "step": 421 + }, + { + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, + "step": 422 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, + "step": 423 + }, + { + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, + "step": 424 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, + "step": 425 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, + "step": 426 + }, + { + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, + "step": 427 + }, + { + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, + "step": 428 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, + "step": 429 + }, + { + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, + "step": 430 + }, + { + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, + "step": 431 + }, + { + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, + "step": 432 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, + "step": 433 + }, + { + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, + "step": 434 + }, + { + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, + "step": 435 + }, + { + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, + "step": 436 + }, + { + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, + "step": 437 + }, + { + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, + "step": 438 + }, + { + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, + "step": 439 + }, + { + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, + "step": 440 + }, + { + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, + "step": 441 + }, + { + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, + "step": 442 + }, + { + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, + "step": 443 + }, + { + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, + "step": 444 + }, + { + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, + "step": 445 + }, + { + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, + "step": 446 + }, + { + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, + "step": 447 + }, + { + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, + "step": 448 + }, + { + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, + "step": 449 + }, + { + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, + "step": 450 + }, + { + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, + "step": 451 + }, + { + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, + "step": 452 + }, + { + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, + "step": 453 + }, + { + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, + "step": 454 + }, + { + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, + "step": 455 + }, + { + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, + "step": 456 + }, + { + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, + "step": 457 + }, + { + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, + "step": 458 + }, + { + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, + "step": 459 + }, + { + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, + "step": 460 + }, + { + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, + "step": 461 + }, + { + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, + "step": 462 + }, + { + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, + "step": 463 + }, + { + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, + "step": 464 + }, + { + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, + "step": 465 + }, + { + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, + "step": 466 + }, + { + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, + "step": 467 + }, + { + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, + "step": 468 + }, + { + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, + "step": 469 + }, + { + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, + "step": 470 + }, + { + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, + "step": 471 + }, + { + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, + "step": 472 + }, + { + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, + "step": 473 + }, + { + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, + "step": 474 + }, + { + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, + "step": 475 + }, + { + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, + "step": 476 + }, + { + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, + "step": 477 + }, + { + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, + "step": 478 + }, + { + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, + "step": 479 + }, + { + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, + "step": 480 + }, + { + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, + "step": 481 + }, + { + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, + "step": 482 + }, + { + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, + "step": 483 + }, + { + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, + "step": 484 + }, + { + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, + "step": 485 + }, + { + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, + "step": 486 + }, + { + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, + "step": 487 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, + "step": 488 + }, + { + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, + "step": 489 + }, + { + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, + "step": 490 + }, + { + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, + "step": 491 + }, + { + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, + "step": 492 + }, + { + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, + "step": 493 + }, + { + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, + "step": 494 + }, + { + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, + "step": 495 + }, + { + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, + "step": 496 + }, + { + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, + "step": 497 + }, + { + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, + "step": 498 + }, + { + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, + "step": 499 + }, + { + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, + "step": 500 + } + ], + "logging_steps": 1, + "max_steps": 1408, + "num_train_epochs": 16, + "save_steps": 100, + "total_flos": 6.820509352598323e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee7ddb867f05d9a969f71467a8eb88994865cf51 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 +size 4155 diff --git a/checkpoint-600/adapter_model.bin b/checkpoint-600/adapter_model.bin index 9187ebbdb23fe0566e9b1bc80bf61091168bec53..0e3b6260f833a8a28a216ce8e666b9d210229def 100644 --- a/checkpoint-600/adapter_model.bin +++ b/checkpoint-600/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bdafbf712a13caac2eff2baadedfa7d69fb1ba1a1618c0652bba7fda3e99d02 +oid sha256:646401c6da34042eb7268456ed4813b4e011d0c30427bf0cfec0315f0a6549d6 size 39409357 diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt index ff5066c6b6e3961344dee396ea0cf019fe2c8008..b4226db73d63627941c147462501ca003003e066 100644 --- a/checkpoint-600/optimizer.pt +++ b/checkpoint-600/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e98eee29adfb5b0c5dfcad4773c74d2234215ec17cf81e21ff40b5dcdd0980f +oid sha256:a5291a1a2741ff5eba566c03897395a429b391eef776c13e39cae513e36c8f0e size 78844421 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth index 961069ec281e351c01eb6735b97b75dd3d8d33b7..2d80ea84c5f73d441972e03ddda3329bcd15dedb 100644 --- a/checkpoint-600/rng_state.pth +++ b/checkpoint-600/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:696b4cef517e79a03215d5c5fccde0c73d2d962fe70ebab1e472f650136142b2 +oid sha256:1dd247f5d8a4bdaa46e9a22dea4b3ff7e3cc6bbd0eca5a0dcd56fe15ceba641a size 14575 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt index 85ac73474f178c78555b00fc82c696e29cc9f8ca..50a8b9b4add9e50970ea37329af2b0293ef923f0 100644 --- a/checkpoint-600/scheduler.pt +++ b/checkpoint-600/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6ec2d50f574aac2aa9116fabbb4b97fdb31897bc783b4e235f34f9907d573b9 +oid sha256:0b4fc3b8a57d0b742fbb1df682ae607287e3cc698015cd0bd91af21e4797b7f6 size 627 diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json index ceb031af3f1beb812ee9cf0893a1d35d18d1d2fc..5ee7ad0a9b4b24fcd3e7e047f14121061f9548ab 100644 --- a/checkpoint-600/trainer_state.json +++ b/checkpoint-600/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 12.140373063547266, + "epoch": 6.77846425419241, "eval_steps": 500, "global_step": 600, "is_hyper_param_search": false, @@ -9,3611 +9,3611 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 8.877519690112819e+17, + "total_flos": 8.186165472352358e+17, "trial_name": null, "trial_params": null } diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-600/training_args.bin +++ b/checkpoint-600/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-700/adapter_model.bin b/checkpoint-700/adapter_model.bin index 555b872534b3769fecafaf9320509b206eaa9ba2..47edcdade531f430a4841b8c75a8994cccc6886e 100644 --- a/checkpoint-700/adapter_model.bin +++ b/checkpoint-700/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fe79826e26fa903fa64560c79bd0221d35aa416cc9a8d5ec9e09e149435dfbb +oid sha256:83a2bd54ab68a3dcad8fe0e5a630e45f5342750c381fd29ab4100c5db3d4e0e9 size 39409357 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt index 6422dd33c8fe76b4f9fbf53da14138518675e40c..a62e3aec1fb00d3f76e73f1aefbfc3d9d39e10aa 100644 --- a/checkpoint-700/optimizer.pt +++ b/checkpoint-700/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:829988f0b71481d26c8b54a86768543c7c14a48d695552f6933481d5709420c1 +oid sha256:81ef498b8b8b31a2a5c36ffe3529e1378029eb4c3e4ba4770de0c248e4c62950 size 78844421 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth index c1c4c79fcb2148e5de373db02c3ee2987200a3a9..a0e229d77fa9bb9693355411d7e7ba7367c65223 100644 --- a/checkpoint-700/rng_state.pth +++ b/checkpoint-700/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e46ce4eb16240da9f3a8b3066acb6f59a234249ee2a3052f3323786da479838 +oid sha256:36fc71bd44bd7f04f2599c5dface64c517de1a7ab7bac3600f3f6470c6c72673 size 14575 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt index 4543a318cb958b4b426e7561985db7ab82a9d34b..5bf1dd76a9cb3ad1e89ec2bcd4d8085e0d94a8e9 100644 --- a/checkpoint-700/scheduler.pt +++ b/checkpoint-700/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:091c20f4c5e244fe8f3c4fd8851e0e20a9e6123652dcfb2277f8793845f1eb70 +oid sha256:7c6badaa6737fa9306a1f25b19c54f91672e90aa94d505b95ab467e7d08dd541 size 627 diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json index 2aeb564b0e254822ffc5698a6594bddc49348229..c2d19f019ed3319a4afbcd2d2827df146950c4f2 100644 --- a/checkpoint-700/trainer_state.json +++ b/checkpoint-700/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 14.163768574138476, + "epoch": 7.908208296557811, "eval_steps": 500, "global_step": 700, "is_hyper_param_search": false, @@ -9,4211 +9,4211 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 1.0356683330832384e+18, + "total_flos": 9.550264840009421e+17, "trial_name": null, "trial_params": null } diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-700/training_args.bin +++ b/checkpoint-700/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-800/adapter_model.bin b/checkpoint-800/adapter_model.bin index cee32efffd1b20413bbd86ddf1c6b5e172aff0ad..5e2cf0fd2fd7c18345726dfa575faed0d4840e70 100644 --- a/checkpoint-800/adapter_model.bin +++ b/checkpoint-800/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1d0c9cf78a0287e75a067e9f589df56d88beac420ee2db9171f4dc9525fa2ca +oid sha256:c96e945f63003849e24de01bb42bf53589067a6378304f2f2bdbc259148446f7 size 39409357 diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt index b0d866f2df62fc478638a4259af93b239d1fe68d..d5c1d7d0df396b867ae4aad1ef5aaa141aa5f1b7 100644 --- a/checkpoint-800/optimizer.pt +++ b/checkpoint-800/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07dbe2f482aa556941cc3c2c9287c9031960f1c986eb88f6af216069cf2f4eca +oid sha256:37fa9f63942fee0e6bc2badebc69bd612f8a781c402497c088b4a2e4afca1e2f size 78844421 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth index b879e8b3e27b533241ce1d1d17eb9d40184e6556..929acb13fd6e1d91a9fa525251307f2db4c55db6 100644 --- a/checkpoint-800/rng_state.pth +++ b/checkpoint-800/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef5771dabc82868bc27d459c4a2a8532048b4914b817944e763d47579f3a8c81 +oid sha256:f0e0f30c7754637247f97149558697a4932915f961818f7360cfc62855116ca0 size 14575 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt index 472e441060b50c337f61e375e95c2d2202b3a99c..6ee10a9418fae9c93ed2ba1e1917c1f7886e9427 100644 --- a/checkpoint-800/scheduler.pt +++ b/checkpoint-800/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb093730edb340d1b1812e0604570255eef4ca2d6126dada512bcd3e775c93af +oid sha256:df029c6d7c683e322312c658d0b6a0e5e9499bad68e681717b9bb9d001e67907 size 627 diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json index 23776a4314c0fade06a35f724e76e246d7aa5dc8..748471aa026988124683f3b8b7ebd368407dc1cf 100644 --- a/checkpoint-800/trainer_state.json +++ b/checkpoint-800/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 16.187164084729687, + "epoch": 9.037952338923212, "eval_steps": 500, "global_step": 800, "is_hyper_param_search": false, @@ -9,4811 +9,4811 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 }, { - "epoch": 14.18, - "learning_rate": 0.00011058673469387754, - "loss": 1.2069, + "epoch": 7.92, + "learning_rate": 0.00010042613636363636, + "loss": 0.6857, "step": 701 }, { - "epoch": 14.2, - "learning_rate": 0.00011045918367346939, - "loss": 1.1968, + "epoch": 7.93, + "learning_rate": 0.00010028409090909093, + "loss": 0.6677, "step": 702 }, { - "epoch": 14.22, - "learning_rate": 0.00011033163265306123, - "loss": 1.2236, + "epoch": 7.94, + "learning_rate": 0.00010014204545454546, + "loss": 0.6697, "step": 703 }, { - "epoch": 14.24, - "learning_rate": 0.00011020408163265306, - "loss": 1.1942, + "epoch": 7.95, + "learning_rate": 0.0001, + "loss": 0.6375, "step": 704 }, { - "epoch": 14.26, - "learning_rate": 0.0001100765306122449, - "loss": 1.2561, + "epoch": 7.96, + "learning_rate": 9.985795454545455e-05, + "loss": 0.6572, "step": 705 }, { - "epoch": 14.29, - "learning_rate": 0.00010994897959183674, - "loss": 1.1839, + "epoch": 7.98, + "learning_rate": 9.97159090909091e-05, + "loss": 0.668, "step": 706 }, { - "epoch": 14.31, - "learning_rate": 0.00010982142857142858, - "loss": 1.2128, + "epoch": 7.99, + "learning_rate": 9.957386363636364e-05, + "loss": 0.6797, "step": 707 }, { - "epoch": 14.33, - "learning_rate": 0.00010969387755102041, - "loss": 1.3086, + "epoch": 8.0, + "learning_rate": 9.943181818181819e-05, + "loss": 0.6784, "step": 708 }, { - "epoch": 14.35, - "learning_rate": 0.00010956632653061226, - "loss": 1.2379, + "epoch": 8.01, + "learning_rate": 9.928977272727273e-05, + "loss": 0.6192, "step": 709 }, { - "epoch": 14.37, - "learning_rate": 0.0001094387755102041, - "loss": 1.176, + "epoch": 8.02, + "learning_rate": 9.914772727272728e-05, + "loss": 0.6287, "step": 710 }, { - "epoch": 14.39, - "learning_rate": 0.00010931122448979592, - "loss": 1.2105, + "epoch": 8.03, + "learning_rate": 9.900568181818183e-05, + "loss": 0.6034, "step": 711 }, { - "epoch": 14.41, - "learning_rate": 0.00010918367346938776, - "loss": 1.2149, + "epoch": 8.04, + "learning_rate": 9.886363636363637e-05, + "loss": 0.6167, "step": 712 }, { - "epoch": 14.43, - "learning_rate": 0.0001090561224489796, - "loss": 1.2392, + "epoch": 8.06, + "learning_rate": 9.872159090909091e-05, + "loss": 0.6353, "step": 713 }, { - "epoch": 14.45, - "learning_rate": 0.00010892857142857142, - "loss": 1.2471, + "epoch": 8.07, + "learning_rate": 9.857954545454547e-05, + "loss": 0.6222, "step": 714 }, { - "epoch": 14.47, - "learning_rate": 0.00010880102040816327, - "loss": 1.2561, + "epoch": 8.08, + "learning_rate": 9.84375e-05, + "loss": 0.5963, "step": 715 }, { - "epoch": 14.49, - "learning_rate": 0.00010867346938775511, - "loss": 1.2179, + "epoch": 8.09, + "learning_rate": 9.829545454545455e-05, + "loss": 0.6042, "step": 716 }, { - "epoch": 14.51, - "learning_rate": 0.00010854591836734696, - "loss": 1.2459, + "epoch": 8.1, + "learning_rate": 9.81534090909091e-05, + "loss": 0.612, "step": 717 }, { - "epoch": 14.53, - "learning_rate": 0.00010841836734693877, - "loss": 1.2933, + "epoch": 8.11, + "learning_rate": 9.801136363636364e-05, + "loss": 0.6069, "step": 718 }, { - "epoch": 14.55, - "learning_rate": 0.00010829081632653062, - "loss": 1.2862, + "epoch": 8.12, + "learning_rate": 9.786931818181818e-05, + "loss": 0.6001, "step": 719 }, { - "epoch": 14.57, - "learning_rate": 0.00010816326530612246, - "loss": 1.2976, + "epoch": 8.13, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6007, "step": 720 }, { - "epoch": 14.59, - "learning_rate": 0.00010803571428571428, - "loss": 1.231, + "epoch": 8.15, + "learning_rate": 9.758522727272727e-05, + "loss": 0.6079, "step": 721 }, { - "epoch": 14.61, - "learning_rate": 0.00010790816326530613, - "loss": 1.2464, + "epoch": 8.16, + "learning_rate": 9.744318181818183e-05, + "loss": 0.6216, "step": 722 }, { - "epoch": 14.63, - "learning_rate": 0.00010778061224489797, - "loss": 1.2181, + "epoch": 8.17, + "learning_rate": 9.730113636363637e-05, + "loss": 0.6321, "step": 723 }, { - "epoch": 14.65, - "learning_rate": 0.00010765306122448979, - "loss": 1.3307, + "epoch": 8.18, + "learning_rate": 9.71590909090909e-05, + "loss": 0.6044, "step": 724 }, { - "epoch": 14.67, - "learning_rate": 0.00010752551020408163, - "loss": 1.1723, + "epoch": 8.19, + "learning_rate": 9.701704545454547e-05, + "loss": 0.6028, "step": 725 }, { - "epoch": 14.69, - "learning_rate": 0.00010739795918367348, - "loss": 1.1528, + "epoch": 8.2, + "learning_rate": 9.687500000000001e-05, + "loss": 0.6098, "step": 726 }, { - "epoch": 14.71, - "learning_rate": 0.0001072704081632653, - "loss": 1.215, + "epoch": 8.21, + "learning_rate": 9.673295454545454e-05, + "loss": 0.6032, "step": 727 }, { - "epoch": 14.73, - "learning_rate": 0.00010714285714285715, - "loss": 1.2624, + "epoch": 8.22, + "learning_rate": 9.65909090909091e-05, + "loss": 0.6298, "step": 728 }, { - "epoch": 14.75, - "learning_rate": 0.00010701530612244898, - "loss": 1.3117, + "epoch": 8.24, + "learning_rate": 9.644886363636365e-05, + "loss": 0.6115, "step": 729 }, { - "epoch": 14.77, - "learning_rate": 0.00010688775510204083, - "loss": 1.2572, + "epoch": 8.25, + "learning_rate": 9.630681818181818e-05, + "loss": 0.6052, "step": 730 }, { - "epoch": 14.79, - "learning_rate": 0.00010676020408163266, - "loss": 1.222, + "epoch": 8.26, + "learning_rate": 9.616477272727274e-05, + "loss": 0.6097, "step": 731 }, { - "epoch": 14.81, - "learning_rate": 0.0001066326530612245, - "loss": 1.2881, + "epoch": 8.27, + "learning_rate": 9.602272727272728e-05, + "loss": 0.6062, "step": 732 }, { - "epoch": 14.83, - "learning_rate": 0.00010650510204081635, - "loss": 1.2676, + "epoch": 8.28, + "learning_rate": 9.588068181818183e-05, + "loss": 0.5984, "step": 733 }, { - "epoch": 14.85, - "learning_rate": 0.00010637755102040816, - "loss": 1.2734, + "epoch": 8.29, + "learning_rate": 9.573863636363637e-05, + "loss": 0.6432, "step": 734 }, { - "epoch": 14.87, - "learning_rate": 0.00010625000000000001, - "loss": 1.2885, + "epoch": 8.3, + "learning_rate": 9.559659090909092e-05, + "loss": 0.5814, "step": 735 }, { - "epoch": 14.89, - "learning_rate": 0.00010612244897959185, - "loss": 1.2764, + "epoch": 8.31, + "learning_rate": 9.545454545454546e-05, + "loss": 0.5965, "step": 736 }, { - "epoch": 14.91, - "learning_rate": 0.00010599489795918367, - "loss": 1.3267, + "epoch": 8.33, + "learning_rate": 9.53125e-05, + "loss": 0.6102, "step": 737 }, { - "epoch": 14.93, - "learning_rate": 0.00010586734693877551, - "loss": 1.2445, + "epoch": 8.34, + "learning_rate": 9.517045454545455e-05, + "loss": 0.5849, "step": 738 }, { - "epoch": 14.95, - "learning_rate": 0.00010573979591836736, - "loss": 1.3359, + "epoch": 8.35, + "learning_rate": 9.50284090909091e-05, + "loss": 0.6062, "step": 739 }, { - "epoch": 14.97, - "learning_rate": 0.00010561224489795918, - "loss": 1.2508, + "epoch": 8.36, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6031, "step": 740 }, { - "epoch": 14.99, - "learning_rate": 0.00010548469387755102, - "loss": 1.2227, + "epoch": 8.37, + "learning_rate": 9.474431818181819e-05, + "loss": 0.5932, "step": 741 }, { - "epoch": 15.01, - "learning_rate": 0.00010535714285714286, - "loss": 1.1889, + "epoch": 8.38, + "learning_rate": 9.460227272727273e-05, + "loss": 0.589, "step": 742 }, { - "epoch": 15.03, - "learning_rate": 0.00010522959183673471, - "loss": 1.1919, + "epoch": 8.39, + "learning_rate": 9.446022727272728e-05, + "loss": 0.6096, "step": 743 }, { - "epoch": 15.05, - "learning_rate": 0.00010510204081632653, - "loss": 1.2383, + "epoch": 8.41, + "learning_rate": 9.431818181818182e-05, + "loss": 0.601, "step": 744 }, { - "epoch": 15.07, - "learning_rate": 0.00010497448979591837, - "loss": 1.2401, + "epoch": 8.42, + "learning_rate": 9.417613636363637e-05, + "loss": 0.5798, "step": 745 }, { - "epoch": 15.09, - "learning_rate": 0.00010484693877551021, - "loss": 1.2015, + "epoch": 8.43, + "learning_rate": 9.403409090909091e-05, + "loss": 0.59, "step": 746 }, { - "epoch": 15.11, - "learning_rate": 0.00010471938775510203, - "loss": 1.1509, + "epoch": 8.44, + "learning_rate": 9.389204545454546e-05, + "loss": 0.5988, "step": 747 }, { - "epoch": 15.13, - "learning_rate": 0.00010459183673469388, - "loss": 1.1878, + "epoch": 8.45, + "learning_rate": 9.375e-05, + "loss": 0.5591, "step": 748 }, { - "epoch": 15.16, - "learning_rate": 0.00010446428571428572, - "loss": 1.1706, + "epoch": 8.46, + "learning_rate": 9.360795454545455e-05, + "loss": 0.5939, "step": 749 }, { - "epoch": 15.18, - "learning_rate": 0.00010433673469387755, - "loss": 1.1285, + "epoch": 8.47, + "learning_rate": 9.346590909090909e-05, + "loss": 0.5886, "step": 750 }, { - "epoch": 15.2, - "learning_rate": 0.0001042091836734694, - "loss": 1.1608, + "epoch": 8.48, + "learning_rate": 9.332386363636364e-05, + "loss": 0.5994, "step": 751 }, { - "epoch": 15.22, - "learning_rate": 0.00010408163265306123, - "loss": 1.1178, + "epoch": 8.5, + "learning_rate": 9.318181818181818e-05, + "loss": 0.5821, "step": 752 }, { - "epoch": 15.24, - "learning_rate": 0.00010395408163265306, - "loss": 1.1293, + "epoch": 8.51, + "learning_rate": 9.303977272727273e-05, + "loss": 0.602, "step": 753 }, { - "epoch": 15.26, - "learning_rate": 0.0001038265306122449, - "loss": 1.2306, + "epoch": 8.52, + "learning_rate": 9.289772727272727e-05, + "loss": 0.5708, "step": 754 }, { - "epoch": 15.28, - "learning_rate": 0.00010369897959183675, - "loss": 1.1541, + "epoch": 8.53, + "learning_rate": 9.275568181818183e-05, + "loss": 0.5902, "step": 755 }, { - "epoch": 15.3, - "learning_rate": 0.00010357142857142859, - "loss": 1.1702, + "epoch": 8.54, + "learning_rate": 9.261363636363636e-05, + "loss": 0.6053, "step": 756 }, { - "epoch": 15.32, - "learning_rate": 0.00010344387755102041, - "loss": 1.2119, + "epoch": 8.55, + "learning_rate": 9.247159090909091e-05, + "loss": 0.5797, "step": 757 }, { - "epoch": 15.34, - "learning_rate": 0.00010331632653061225, - "loss": 1.2239, + "epoch": 8.56, + "learning_rate": 9.232954545454547e-05, + "loss": 0.5965, "step": 758 }, { - "epoch": 15.36, - "learning_rate": 0.0001031887755102041, - "loss": 1.2019, + "epoch": 8.57, + "learning_rate": 9.21875e-05, + "loss": 0.5738, "step": 759 }, { - "epoch": 15.38, - "learning_rate": 0.00010306122448979591, - "loss": 1.2197, + "epoch": 8.59, + "learning_rate": 9.204545454545454e-05, + "loss": 0.5819, "step": 760 }, { - "epoch": 15.4, - "learning_rate": 0.00010293367346938776, - "loss": 1.1769, + "epoch": 8.6, + "learning_rate": 9.19034090909091e-05, + "loss": 0.5994, "step": 761 }, { - "epoch": 15.42, - "learning_rate": 0.0001028061224489796, - "loss": 1.1907, + "epoch": 8.61, + "learning_rate": 9.176136363636363e-05, + "loss": 0.5738, "step": 762 }, { - "epoch": 15.44, - "learning_rate": 0.00010267857142857142, - "loss": 1.2089, + "epoch": 8.62, + "learning_rate": 9.161931818181818e-05, + "loss": 0.5663, "step": 763 }, { - "epoch": 15.46, - "learning_rate": 0.00010255102040816327, - "loss": 1.1335, + "epoch": 8.63, + "learning_rate": 9.147727272727274e-05, + "loss": 0.5798, "step": 764 }, { - "epoch": 15.48, - "learning_rate": 0.00010242346938775511, - "loss": 1.1633, + "epoch": 8.64, + "learning_rate": 9.133522727272727e-05, + "loss": 0.5705, "step": 765 }, { - "epoch": 15.5, - "learning_rate": 0.00010229591836734695, - "loss": 1.1578, + "epoch": 8.65, + "learning_rate": 9.119318181818183e-05, + "loss": 0.5943, "step": 766 }, { - "epoch": 15.52, - "learning_rate": 0.00010216836734693877, - "loss": 1.2236, + "epoch": 8.67, + "learning_rate": 9.105113636363637e-05, + "loss": 0.6019, "step": 767 }, { - "epoch": 15.54, - "learning_rate": 0.00010204081632653062, - "loss": 1.1941, + "epoch": 8.68, + "learning_rate": 9.090909090909092e-05, + "loss": 0.5733, "step": 768 }, { - "epoch": 15.56, - "learning_rate": 0.00010191326530612246, - "loss": 1.2666, + "epoch": 8.69, + "learning_rate": 9.076704545454546e-05, + "loss": 0.575, "step": 769 }, { - "epoch": 15.58, - "learning_rate": 0.00010178571428571428, - "loss": 1.1232, + "epoch": 8.7, + "learning_rate": 9.062500000000001e-05, + "loss": 0.5675, "step": 770 }, { - "epoch": 15.6, - "learning_rate": 0.00010165816326530612, - "loss": 1.2242, + "epoch": 8.71, + "learning_rate": 9.048295454545455e-05, + "loss": 0.566, "step": 771 }, { - "epoch": 15.62, - "learning_rate": 0.00010153061224489797, - "loss": 1.1852, + "epoch": 8.72, + "learning_rate": 9.03409090909091e-05, + "loss": 0.5513, "step": 772 }, { - "epoch": 15.64, - "learning_rate": 0.0001014030612244898, - "loss": 1.2626, + "epoch": 8.73, + "learning_rate": 9.019886363636364e-05, + "loss": 0.5682, "step": 773 }, { - "epoch": 15.66, - "learning_rate": 0.00010127551020408164, - "loss": 1.1873, + "epoch": 8.74, + "learning_rate": 9.005681818181819e-05, + "loss": 0.5508, "step": 774 }, { - "epoch": 15.68, - "learning_rate": 0.00010114795918367349, - "loss": 1.3005, + "epoch": 8.76, + "learning_rate": 8.991477272727273e-05, + "loss": 0.5668, "step": 775 }, { - "epoch": 15.7, - "learning_rate": 0.0001010204081632653, - "loss": 1.1904, + "epoch": 8.77, + "learning_rate": 8.977272727272728e-05, + "loss": 0.569, "step": 776 }, { - "epoch": 15.72, - "learning_rate": 0.00010089285714285715, - "loss": 1.2927, + "epoch": 8.78, + "learning_rate": 8.963068181818182e-05, + "loss": 0.5897, "step": 777 }, { - "epoch": 15.74, - "learning_rate": 0.00010076530612244899, - "loss": 1.179, + "epoch": 8.79, + "learning_rate": 8.948863636363637e-05, + "loss": 0.5738, "step": 778 }, { - "epoch": 15.76, - "learning_rate": 0.00010063775510204084, - "loss": 1.2027, + "epoch": 8.8, + "learning_rate": 8.934659090909091e-05, + "loss": 0.5511, "step": 779 }, { - "epoch": 15.78, - "learning_rate": 0.00010051020408163265, - "loss": 1.2428, + "epoch": 8.81, + "learning_rate": 8.920454545454546e-05, + "loss": 0.5659, "step": 780 }, { - "epoch": 15.8, - "learning_rate": 0.0001003826530612245, - "loss": 1.2324, + "epoch": 8.82, + "learning_rate": 8.90625e-05, + "loss": 0.5649, "step": 781 }, { - "epoch": 15.82, - "learning_rate": 0.00010025510204081634, - "loss": 1.1251, + "epoch": 8.83, + "learning_rate": 8.892045454545455e-05, + "loss": 0.5618, "step": 782 }, { - "epoch": 15.84, - "learning_rate": 0.00010012755102040816, - "loss": 1.2405, + "epoch": 8.85, + "learning_rate": 8.87784090909091e-05, + "loss": 0.5602, "step": 783 }, { - "epoch": 15.86, - "learning_rate": 0.0001, - "loss": 1.2005, + "epoch": 8.86, + "learning_rate": 8.863636363636364e-05, + "loss": 0.5723, "step": 784 }, { - "epoch": 15.88, - "learning_rate": 9.987244897959184e-05, - "loss": 1.2259, + "epoch": 8.87, + "learning_rate": 8.849431818181818e-05, + "loss": 0.5816, "step": 785 }, { - "epoch": 15.9, - "learning_rate": 9.974489795918368e-05, - "loss": 1.1576, + "epoch": 8.88, + "learning_rate": 8.835227272727273e-05, + "loss": 0.555, "step": 786 }, { - "epoch": 15.92, - "learning_rate": 9.961734693877551e-05, - "loss": 1.1834, + "epoch": 8.89, + "learning_rate": 8.821022727272727e-05, + "loss": 0.5563, "step": 787 }, { - "epoch": 15.94, - "learning_rate": 9.948979591836736e-05, - "loss": 1.2396, + "epoch": 8.9, + "learning_rate": 8.806818181818183e-05, + "loss": 0.554, "step": 788 }, { - "epoch": 15.96, - "learning_rate": 9.936224489795919e-05, - "loss": 1.1865, + "epoch": 8.91, + "learning_rate": 8.792613636363636e-05, + "loss": 0.5671, "step": 789 }, { - "epoch": 15.98, - "learning_rate": 9.923469387755102e-05, - "loss": 1.2356, + "epoch": 8.92, + "learning_rate": 8.778409090909091e-05, + "loss": 0.5485, "step": 790 }, { - "epoch": 16.01, - "learning_rate": 9.910714285714286e-05, - "loss": 1.2639, + "epoch": 8.94, + "learning_rate": 8.764204545454547e-05, + "loss": 0.5712, "step": 791 }, { - "epoch": 16.03, - "learning_rate": 9.897959183673469e-05, - "loss": 1.1216, + "epoch": 8.95, + "learning_rate": 8.75e-05, + "loss": 0.5507, "step": 792 }, { - "epoch": 16.05, - "learning_rate": 9.885204081632652e-05, - "loss": 1.1051, + "epoch": 8.96, + "learning_rate": 8.735795454545454e-05, + "loss": 0.5718, "step": 793 }, { - "epoch": 16.07, - "learning_rate": 9.872448979591837e-05, - "loss": 1.0864, + "epoch": 8.97, + "learning_rate": 8.72159090909091e-05, + "loss": 0.5585, "step": 794 }, { - "epoch": 16.09, - "learning_rate": 9.859693877551021e-05, - "loss": 1.182, + "epoch": 8.98, + "learning_rate": 8.707386363636363e-05, + "loss": 0.5563, "step": 795 }, { - "epoch": 16.11, - "learning_rate": 9.846938775510204e-05, - "loss": 1.1272, + "epoch": 8.99, + "learning_rate": 8.693181818181818e-05, + "loss": 0.581, "step": 796 }, { - "epoch": 16.13, - "learning_rate": 9.834183673469389e-05, - "loss": 1.1946, + "epoch": 9.0, + "learning_rate": 8.678977272727274e-05, + "loss": 0.5511, "step": 797 }, { - "epoch": 16.15, - "learning_rate": 9.821428571428572e-05, - "loss": 1.0875, + "epoch": 9.02, + "learning_rate": 8.664772727272727e-05, + "loss": 0.5103, "step": 798 }, { - "epoch": 16.17, - "learning_rate": 9.808673469387756e-05, - "loss": 1.1671, + "epoch": 9.03, + "learning_rate": 8.650568181818183e-05, + "loss": 0.5323, "step": 799 }, { - "epoch": 16.19, - "learning_rate": 9.79591836734694e-05, - "loss": 1.1502, + "epoch": 9.04, + "learning_rate": 8.636363636363637e-05, + "loss": 0.5092, "step": 800 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 1.1836061421370368e+18, + "total_flos": 1.0915376160523162e+18, "trial_name": null, "trial_params": null } diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-800/training_args.bin +++ b/checkpoint-800/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/checkpoint-900/adapter_model.bin b/checkpoint-900/adapter_model.bin index db7ed4f6ae4a91e62162d01678a6701bba9864f2..e9b913951f0eafb85b209a393e5f742ac4bf6408 100644 --- a/checkpoint-900/adapter_model.bin +++ b/checkpoint-900/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f51c1d84f0d247f48d41fba2b15457feed1d404eeff6da6d13cabd701e815176 +oid sha256:ed6e90196f5a274b5d5ff0f18e648b6396fc99189dc82111fcfd2e83656a72f7 size 39409357 diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt index 24c7142a336fb1d38f0ad6b55a013afa82c328c9..1590a6e8ae4cbf800ea484118a40f6570d2e685f 100644 --- a/checkpoint-900/optimizer.pt +++ b/checkpoint-900/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:450feba545ce1622e78b86c9c73450f53cd2c861fd82ef80a705f3ff9afe3e5a +oid sha256:2fe640e1cc583a2fcc1f7be360da696c6de30f28f00d5f64bbb3f586eab33160 size 78844421 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth index 318fa43e6207ee82707af116de91e1ec428b2697..df08b4983b9f5bfcf57aaaceb89bc51479bfd216 100644 --- a/checkpoint-900/rng_state.pth +++ b/checkpoint-900/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18451c8480ecb75a9e33d086345a13c29ae0a8eae26b215a820f4299cc32f2a0 +oid sha256:ab62043be50b93d4eb28964be2d945176db3d64fe73ddd052a7656ba9141c683 size 14575 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt index 819e0686a4ea5e085317f0dd042805cc2503ad09..8310496ea86f52d12b55338abbc5ec74b78210b3 100644 --- a/checkpoint-900/scheduler.pt +++ b/checkpoint-900/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f7ed7c2c4b503d29d3bdb091842bdfbacee353eb03798fc384008a89c404484 +oid sha256:7b34852b759fa1b56e14c21698a89fa4737f1160151f73515f2ad9b2b1334acd size 627 diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json index 64e9788c0c92d579e2bac3e81f871d41896de5f2..8a9d005ceb7bbeefb60d1077adafe99907c2a0e3 100644 --- a/checkpoint-900/trainer_state.json +++ b/checkpoint-900/trainer_state.json @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 18.2105595953209, + "epoch": 10.167696381288614, "eval_steps": 500, "global_step": 900, "is_hyper_param_search": false, @@ -9,5411 +9,5411 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 0.00019987244897959184, - "loss": 3.2215, + "epoch": 0.01, + "learning_rate": 0.00019985795454545454, + "loss": 3.3254, "step": 1 }, { - "epoch": 0.04, - "learning_rate": 0.00019974489795918367, - "loss": 2.8365, + "epoch": 0.02, + "learning_rate": 0.0001997159090909091, + "loss": 3.1222, "step": 2 }, { - "epoch": 0.06, - "learning_rate": 0.00019961734693877553, - "loss": 2.602, + "epoch": 0.03, + "learning_rate": 0.00019957386363636366, + "loss": 2.9506, "step": 3 }, { - "epoch": 0.08, - "learning_rate": 0.00019948979591836736, - "loss": 2.4196, + "epoch": 0.05, + "learning_rate": 0.0001994318181818182, + "loss": 2.8459, "step": 4 }, { - "epoch": 0.1, - "learning_rate": 0.0001993622448979592, - "loss": 2.2574, + "epoch": 0.06, + "learning_rate": 0.00019928977272727275, + "loss": 2.7277, "step": 5 }, { - "epoch": 0.12, - "learning_rate": 0.00019923469387755102, - "loss": 2.2239, + "epoch": 0.07, + "learning_rate": 0.00019914772727272728, + "loss": 2.6184, "step": 6 }, { - "epoch": 0.14, - "learning_rate": 0.00019910714285714288, - "loss": 2.1661, + "epoch": 0.08, + "learning_rate": 0.0001990056818181818, + "loss": 2.5151, "step": 7 }, { - "epoch": 0.16, - "learning_rate": 0.0001989795918367347, - "loss": 2.0987, + "epoch": 0.09, + "learning_rate": 0.00019886363636363637, + "loss": 2.4234, "step": 8 }, { - "epoch": 0.18, - "learning_rate": 0.00019885204081632654, - "loss": 2.015, + "epoch": 0.1, + "learning_rate": 0.00019872159090909093, + "loss": 2.3795, "step": 9 }, { - "epoch": 0.2, - "learning_rate": 0.00019872448979591837, - "loss": 1.9771, + "epoch": 0.11, + "learning_rate": 0.00019857954545454546, + "loss": 2.3629, "step": 10 }, { - "epoch": 0.22, - "learning_rate": 0.00019859693877551023, - "loss": 2.0271, + "epoch": 0.12, + "learning_rate": 0.00019843750000000002, + "loss": 2.3246, "step": 11 }, { - "epoch": 0.24, - "learning_rate": 0.00019846938775510203, - "loss": 1.9812, + "epoch": 0.14, + "learning_rate": 0.00019829545454545455, + "loss": 2.2274, "step": 12 }, { - "epoch": 0.26, - "learning_rate": 0.0001983418367346939, - "loss": 2.0834, + "epoch": 0.15, + "learning_rate": 0.00019815340909090908, + "loss": 2.2545, "step": 13 }, { - "epoch": 0.28, - "learning_rate": 0.00019821428571428572, - "loss": 1.9174, + "epoch": 0.16, + "learning_rate": 0.00019801136363636367, + "loss": 2.2814, "step": 14 }, { - "epoch": 0.3, - "learning_rate": 0.00019808673469387755, - "loss": 1.8409, + "epoch": 0.17, + "learning_rate": 0.0001978693181818182, + "loss": 2.2004, "step": 15 }, { - "epoch": 0.32, - "learning_rate": 0.00019795918367346938, - "loss": 1.929, + "epoch": 0.18, + "learning_rate": 0.00019772727272727273, + "loss": 2.1897, "step": 16 }, { - "epoch": 0.34, - "learning_rate": 0.00019783163265306124, - "loss": 2.0041, + "epoch": 0.19, + "learning_rate": 0.0001975852272727273, + "loss": 2.2214, "step": 17 }, { - "epoch": 0.36, - "learning_rate": 0.00019770408163265305, - "loss": 1.9385, + "epoch": 0.2, + "learning_rate": 0.00019744318181818182, + "loss": 2.2103, "step": 18 }, { - "epoch": 0.38, - "learning_rate": 0.0001975765306122449, - "loss": 1.9592, + "epoch": 0.21, + "learning_rate": 0.00019730113636363635, + "loss": 2.1747, "step": 19 }, { - "epoch": 0.4, - "learning_rate": 0.00019744897959183674, - "loss": 1.9701, + "epoch": 0.23, + "learning_rate": 0.00019715909090909094, + "loss": 2.2067, "step": 20 }, { - "epoch": 0.42, - "learning_rate": 0.0001973214285714286, - "loss": 1.9277, + "epoch": 0.24, + "learning_rate": 0.00019701704545454547, + "loss": 2.1944, "step": 21 }, { - "epoch": 0.45, - "learning_rate": 0.00019719387755102042, - "loss": 1.8394, + "epoch": 0.25, + "learning_rate": 0.000196875, + "loss": 2.2088, "step": 22 }, { - "epoch": 0.47, - "learning_rate": 0.00019706632653061226, - "loss": 1.8666, + "epoch": 0.26, + "learning_rate": 0.00019673295454545456, + "loss": 2.1786, "step": 23 }, { - "epoch": 0.49, - "learning_rate": 0.00019693877551020409, - "loss": 1.8997, + "epoch": 0.27, + "learning_rate": 0.0001965909090909091, + "loss": 2.1242, "step": 24 }, { - "epoch": 0.51, - "learning_rate": 0.00019681122448979592, - "loss": 1.9432, + "epoch": 0.28, + "learning_rate": 0.00019644886363636365, + "loss": 2.1233, "step": 25 }, { - "epoch": 0.53, - "learning_rate": 0.00019668367346938777, - "loss": 1.9137, + "epoch": 0.29, + "learning_rate": 0.0001963068181818182, + "loss": 2.1616, "step": 26 }, { - "epoch": 0.55, - "learning_rate": 0.0001965561224489796, - "loss": 1.905, + "epoch": 0.31, + "learning_rate": 0.00019616477272727274, + "loss": 2.1175, "step": 27 }, { - "epoch": 0.57, - "learning_rate": 0.00019642857142857144, - "loss": 1.8708, + "epoch": 0.32, + "learning_rate": 0.00019602272727272727, + "loss": 2.1242, "step": 28 }, { - "epoch": 0.59, - "learning_rate": 0.00019630102040816327, - "loss": 1.9097, + "epoch": 0.33, + "learning_rate": 0.00019588068181818183, + "loss": 2.186, "step": 29 }, { - "epoch": 0.61, - "learning_rate": 0.00019617346938775513, - "loss": 1.896, + "epoch": 0.34, + "learning_rate": 0.00019573863636363636, + "loss": 2.1319, "step": 30 }, { - "epoch": 0.63, - "learning_rate": 0.00019604591836734696, - "loss": 1.8834, + "epoch": 0.35, + "learning_rate": 0.00019559659090909092, + "loss": 2.1219, "step": 31 }, { - "epoch": 0.65, - "learning_rate": 0.0001959183673469388, - "loss": 1.8323, + "epoch": 0.36, + "learning_rate": 0.00019545454545454548, + "loss": 2.1094, "step": 32 }, { - "epoch": 0.67, - "learning_rate": 0.00019579081632653062, - "loss": 1.804, + "epoch": 0.37, + "learning_rate": 0.0001953125, + "loss": 2.1355, "step": 33 }, { - "epoch": 0.69, - "learning_rate": 0.00019566326530612248, - "loss": 1.8906, + "epoch": 0.38, + "learning_rate": 0.00019517045454545454, + "loss": 2.1231, "step": 34 }, { - "epoch": 0.71, - "learning_rate": 0.00019553571428571428, - "loss": 1.8693, + "epoch": 0.4, + "learning_rate": 0.0001950284090909091, + "loss": 2.1089, "step": 35 }, { - "epoch": 0.73, - "learning_rate": 0.00019540816326530614, - "loss": 1.9308, + "epoch": 0.41, + "learning_rate": 0.00019488636363636366, + "loss": 2.1329, "step": 36 }, { - "epoch": 0.75, - "learning_rate": 0.00019528061224489797, - "loss": 1.8082, + "epoch": 0.42, + "learning_rate": 0.0001947443181818182, + "loss": 2.1159, "step": 37 }, { - "epoch": 0.77, - "learning_rate": 0.0001951530612244898, - "loss": 1.848, + "epoch": 0.43, + "learning_rate": 0.00019460227272727275, + "loss": 2.1001, "step": 38 }, { - "epoch": 0.79, - "learning_rate": 0.00019502551020408163, - "loss": 1.8866, + "epoch": 0.44, + "learning_rate": 0.00019446022727272728, + "loss": 2.1084, "step": 39 }, { - "epoch": 0.81, - "learning_rate": 0.0001948979591836735, - "loss": 1.7844, + "epoch": 0.45, + "learning_rate": 0.0001943181818181818, + "loss": 2.1431, "step": 40 }, { - "epoch": 0.83, - "learning_rate": 0.0001947704081632653, - "loss": 1.8485, + "epoch": 0.46, + "learning_rate": 0.00019417613636363637, + "loss": 2.1111, "step": 41 }, { - "epoch": 0.85, - "learning_rate": 0.00019464285714285715, - "loss": 1.7917, + "epoch": 0.47, + "learning_rate": 0.00019403409090909093, + "loss": 2.1067, "step": 42 }, { - "epoch": 0.87, - "learning_rate": 0.00019451530612244898, - "loss": 1.7342, + "epoch": 0.49, + "learning_rate": 0.00019389204545454546, + "loss": 2.0974, "step": 43 }, { - "epoch": 0.89, - "learning_rate": 0.00019438775510204084, - "loss": 1.8479, + "epoch": 0.5, + "learning_rate": 0.00019375000000000002, + "loss": 2.1001, "step": 44 }, { - "epoch": 0.91, - "learning_rate": 0.00019426020408163267, - "loss": 1.8639, + "epoch": 0.51, + "learning_rate": 0.00019360795454545455, + "loss": 2.0721, "step": 45 }, { - "epoch": 0.93, - "learning_rate": 0.0001941326530612245, - "loss": 1.8166, + "epoch": 0.52, + "learning_rate": 0.00019346590909090908, + "loss": 2.0786, "step": 46 }, { - "epoch": 0.95, - "learning_rate": 0.00019400510204081633, - "loss": 1.7566, + "epoch": 0.53, + "learning_rate": 0.00019332386363636367, + "loss": 2.0882, "step": 47 }, { - "epoch": 0.97, - "learning_rate": 0.00019387755102040816, - "loss": 1.8071, + "epoch": 0.54, + "learning_rate": 0.0001931818181818182, + "loss": 2.083, "step": 48 }, { - "epoch": 0.99, - "learning_rate": 0.00019375000000000002, - "loss": 1.8612, + "epoch": 0.55, + "learning_rate": 0.00019303977272727273, + "loss": 2.1016, "step": 49 }, { - "epoch": 1.01, - "learning_rate": 0.00019362244897959185, - "loss": 1.7819, + "epoch": 0.56, + "learning_rate": 0.0001928977272727273, + "loss": 2.0844, "step": 50 }, { - "epoch": 1.03, - "learning_rate": 0.00019349489795918368, - "loss": 1.8647, + "epoch": 0.58, + "learning_rate": 0.00019275568181818182, + "loss": 2.0891, "step": 51 }, { - "epoch": 1.05, - "learning_rate": 0.0001933673469387755, - "loss": 1.8196, + "epoch": 0.59, + "learning_rate": 0.00019261363636363635, + "loss": 2.053, "step": 52 }, { - "epoch": 1.07, - "learning_rate": 0.00019323979591836737, - "loss": 1.8027, + "epoch": 0.6, + "learning_rate": 0.00019247159090909094, + "loss": 2.1013, "step": 53 }, { - "epoch": 1.09, - "learning_rate": 0.00019311224489795917, - "loss": 1.8927, + "epoch": 0.61, + "learning_rate": 0.00019232954545454547, + "loss": 2.127, "step": 54 }, { - "epoch": 1.11, - "learning_rate": 0.00019298469387755103, - "loss": 1.8481, + "epoch": 0.62, + "learning_rate": 0.0001921875, + "loss": 2.0909, "step": 55 }, { - "epoch": 1.13, - "learning_rate": 0.00019285714285714286, - "loss": 1.7781, + "epoch": 0.63, + "learning_rate": 0.00019204545454545456, + "loss": 2.1026, "step": 56 }, { - "epoch": 1.15, - "learning_rate": 0.00019272959183673472, - "loss": 1.8101, + "epoch": 0.64, + "learning_rate": 0.0001919034090909091, + "loss": 2.0689, "step": 57 }, { - "epoch": 1.17, - "learning_rate": 0.00019260204081632653, - "loss": 1.7257, + "epoch": 0.66, + "learning_rate": 0.00019176136363636365, + "loss": 2.0475, "step": 58 }, { - "epoch": 1.19, - "learning_rate": 0.00019247448979591838, - "loss": 1.8185, + "epoch": 0.67, + "learning_rate": 0.0001916193181818182, + "loss": 2.0645, "step": 59 }, { - "epoch": 1.21, - "learning_rate": 0.00019234693877551021, - "loss": 1.8557, + "epoch": 0.68, + "learning_rate": 0.00019147727272727274, + "loss": 2.0469, "step": 60 }, { - "epoch": 1.23, - "learning_rate": 0.00019221938775510204, - "loss": 1.7418, + "epoch": 0.69, + "learning_rate": 0.00019133522727272727, + "loss": 2.081, "step": 61 }, { - "epoch": 1.25, - "learning_rate": 0.00019209183673469388, - "loss": 1.6879, + "epoch": 0.7, + "learning_rate": 0.00019119318181818183, + "loss": 2.0682, "step": 62 }, { - "epoch": 1.27, - "learning_rate": 0.00019196428571428573, - "loss": 1.7651, + "epoch": 0.71, + "learning_rate": 0.00019105113636363636, + "loss": 2.0794, "step": 63 }, { - "epoch": 1.29, - "learning_rate": 0.00019183673469387756, - "loss": 1.7759, + "epoch": 0.72, + "learning_rate": 0.00019090909090909092, + "loss": 2.0218, "step": 64 }, { - "epoch": 1.32, - "learning_rate": 0.0001917091836734694, - "loss": 1.7691, + "epoch": 0.73, + "learning_rate": 0.00019076704545454548, + "loss": 2.0791, "step": 65 }, { - "epoch": 1.34, - "learning_rate": 0.00019158163265306123, - "loss": 1.7794, + "epoch": 0.75, + "learning_rate": 0.000190625, + "loss": 2.0506, "step": 66 }, { - "epoch": 1.36, - "learning_rate": 0.00019145408163265306, - "loss": 1.8152, + "epoch": 0.76, + "learning_rate": 0.00019048295454545454, + "loss": 2.0581, "step": 67 }, { - "epoch": 1.38, - "learning_rate": 0.00019132653061224492, - "loss": 1.8052, + "epoch": 0.77, + "learning_rate": 0.0001903409090909091, + "loss": 2.0614, "step": 68 }, { - "epoch": 1.4, - "learning_rate": 0.00019119897959183675, - "loss": 1.8054, + "epoch": 0.78, + "learning_rate": 0.00019019886363636366, + "loss": 2.0743, "step": 69 }, { - "epoch": 1.42, - "learning_rate": 0.00019107142857142858, - "loss": 1.8114, + "epoch": 0.79, + "learning_rate": 0.0001900568181818182, + "loss": 2.0934, "step": 70 }, { - "epoch": 1.44, - "learning_rate": 0.0001909438775510204, - "loss": 1.7749, + "epoch": 0.8, + "learning_rate": 0.00018991477272727275, + "loss": 2.0695, "step": 71 }, { - "epoch": 1.46, - "learning_rate": 0.00019081632653061227, - "loss": 1.777, + "epoch": 0.81, + "learning_rate": 0.00018977272727272728, + "loss": 2.0651, "step": 72 }, { - "epoch": 1.48, - "learning_rate": 0.0001906887755102041, - "loss": 1.7896, + "epoch": 0.82, + "learning_rate": 0.00018963068181818181, + "loss": 2.1002, "step": 73 }, { - "epoch": 1.5, - "learning_rate": 0.00019056122448979593, - "loss": 1.8335, + "epoch": 0.84, + "learning_rate": 0.00018948863636363637, + "loss": 2.0691, "step": 74 }, { - "epoch": 1.52, - "learning_rate": 0.00019043367346938776, - "loss": 1.8155, + "epoch": 0.85, + "learning_rate": 0.00018934659090909093, + "loss": 2.0596, "step": 75 }, { - "epoch": 1.54, - "learning_rate": 0.00019030612244897962, - "loss": 1.8224, + "epoch": 0.86, + "learning_rate": 0.00018920454545454546, + "loss": 2.0542, "step": 76 }, { - "epoch": 1.56, - "learning_rate": 0.00019017857142857142, - "loss": 1.7889, + "epoch": 0.87, + "learning_rate": 0.00018906250000000002, + "loss": 2.0543, "step": 77 }, { - "epoch": 1.58, - "learning_rate": 0.00019005102040816328, - "loss": 1.8866, + "epoch": 0.88, + "learning_rate": 0.00018892045454545455, + "loss": 2.0042, "step": 78 }, { - "epoch": 1.6, - "learning_rate": 0.0001899234693877551, - "loss": 1.8439, + "epoch": 0.89, + "learning_rate": 0.00018877840909090908, + "loss": 2.0072, "step": 79 }, { - "epoch": 1.62, - "learning_rate": 0.00018979591836734697, - "loss": 1.7906, + "epoch": 0.9, + "learning_rate": 0.00018863636363636364, + "loss": 2.0926, "step": 80 }, { - "epoch": 1.64, - "learning_rate": 0.00018966836734693877, - "loss": 1.8627, + "epoch": 0.92, + "learning_rate": 0.0001884943181818182, + "loss": 2.0015, "step": 81 }, { - "epoch": 1.66, - "learning_rate": 0.00018954081632653063, - "loss": 1.7497, + "epoch": 0.93, + "learning_rate": 0.00018835227272727273, + "loss": 2.0591, "step": 82 }, { - "epoch": 1.68, - "learning_rate": 0.00018941326530612246, - "loss": 1.7936, + "epoch": 0.94, + "learning_rate": 0.0001882102272727273, + "loss": 2.0522, "step": 83 }, { - "epoch": 1.7, - "learning_rate": 0.0001892857142857143, - "loss": 1.8341, + "epoch": 0.95, + "learning_rate": 0.00018806818181818182, + "loss": 2.0131, "step": 84 }, { - "epoch": 1.72, - "learning_rate": 0.00018915816326530612, - "loss": 1.7868, + "epoch": 0.96, + "learning_rate": 0.00018792613636363636, + "loss": 2.0572, "step": 85 }, { - "epoch": 1.74, - "learning_rate": 0.00018903061224489798, - "loss": 1.7493, + "epoch": 0.97, + "learning_rate": 0.00018778409090909091, + "loss": 2.0352, "step": 86 }, { - "epoch": 1.76, - "learning_rate": 0.0001889030612244898, - "loss": 1.7926, + "epoch": 0.98, + "learning_rate": 0.00018764204545454547, + "loss": 1.9937, "step": 87 }, { - "epoch": 1.78, - "learning_rate": 0.00018877551020408164, - "loss": 1.8278, + "epoch": 0.99, + "learning_rate": 0.0001875, + "loss": 2.0534, "step": 88 }, { - "epoch": 1.8, - "learning_rate": 0.00018864795918367347, - "loss": 1.7387, + "epoch": 1.01, + "learning_rate": 0.00018735795454545456, + "loss": 2.0151, "step": 89 }, { - "epoch": 1.82, - "learning_rate": 0.0001885204081632653, - "loss": 1.7669, + "epoch": 1.02, + "learning_rate": 0.0001872159090909091, + "loss": 2.0281, "step": 90 }, { - "epoch": 1.84, - "learning_rate": 0.00018839285714285716, - "loss": 1.7686, + "epoch": 1.03, + "learning_rate": 0.00018707386363636365, + "loss": 2.0582, "step": 91 }, { - "epoch": 1.86, - "learning_rate": 0.000188265306122449, - "loss": 1.7759, + "epoch": 1.04, + "learning_rate": 0.00018693181818181818, + "loss": 2.0173, "step": 92 }, { - "epoch": 1.88, - "learning_rate": 0.00018813775510204082, - "loss": 1.7016, + "epoch": 1.05, + "learning_rate": 0.00018678977272727274, + "loss": 2.0318, "step": 93 }, { - "epoch": 1.9, - "learning_rate": 0.00018801020408163265, - "loss": 1.8123, + "epoch": 1.06, + "learning_rate": 0.00018664772727272727, + "loss": 2.0747, "step": 94 }, { - "epoch": 1.92, - "learning_rate": 0.0001878826530612245, - "loss": 1.8315, + "epoch": 1.07, + "learning_rate": 0.00018650568181818183, + "loss": 2.0036, "step": 95 }, { - "epoch": 1.94, - "learning_rate": 0.00018775510204081634, - "loss": 1.7679, + "epoch": 1.08, + "learning_rate": 0.00018636363636363636, + "loss": 2.0215, "step": 96 }, { - "epoch": 1.96, - "learning_rate": 0.00018762755102040817, - "loss": 1.7874, + "epoch": 1.1, + "learning_rate": 0.00018622159090909092, + "loss": 2.0385, "step": 97 }, { - "epoch": 1.98, - "learning_rate": 0.0001875, - "loss": 1.8008, + "epoch": 1.11, + "learning_rate": 0.00018607954545454545, + "loss": 2.0247, "step": 98 }, { - "epoch": 2.0, - "learning_rate": 0.00018737244897959186, - "loss": 1.7177, + "epoch": 1.12, + "learning_rate": 0.0001859375, + "loss": 2.0075, "step": 99 }, { - "epoch": 2.02, - "learning_rate": 0.00018724489795918367, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.00018579545454545454, + "loss": 2.0134, "step": 100 }, { - "epoch": 2.04, - "learning_rate": 0.00018711734693877552, - "loss": 1.7848, + "epoch": 1.14, + "learning_rate": 0.0001856534090909091, + "loss": 1.9908, "step": 101 }, { - "epoch": 2.06, - "learning_rate": 0.00018698979591836735, - "loss": 1.744, + "epoch": 1.15, + "learning_rate": 0.00018551136363636366, + "loss": 2.0048, "step": 102 }, { - "epoch": 2.08, - "learning_rate": 0.00018686224489795919, - "loss": 1.7005, + "epoch": 1.16, + "learning_rate": 0.0001853693181818182, + "loss": 1.9929, "step": 103 }, { - "epoch": 2.1, - "learning_rate": 0.00018673469387755102, - "loss": 1.8247, + "epoch": 1.17, + "learning_rate": 0.00018522727272727273, + "loss": 2.0545, "step": 104 }, { - "epoch": 2.12, - "learning_rate": 0.00018660714285714287, - "loss": 1.6855, + "epoch": 1.19, + "learning_rate": 0.00018508522727272728, + "loss": 2.0212, "step": 105 }, { - "epoch": 2.14, - "learning_rate": 0.0001864795918367347, - "loss": 1.7627, + "epoch": 1.2, + "learning_rate": 0.00018494318181818182, + "loss": 2.0154, "step": 106 }, { - "epoch": 2.17, - "learning_rate": 0.00018635204081632654, - "loss": 1.7564, + "epoch": 1.21, + "learning_rate": 0.00018480113636363637, + "loss": 1.988, "step": 107 }, { - "epoch": 2.19, - "learning_rate": 0.00018622448979591837, - "loss": 1.8237, + "epoch": 1.22, + "learning_rate": 0.00018465909090909093, + "loss": 2.004, "step": 108 }, { - "epoch": 2.21, - "learning_rate": 0.00018609693877551022, - "loss": 1.7421, + "epoch": 1.23, + "learning_rate": 0.00018451704545454546, + "loss": 1.9902, "step": 109 }, { - "epoch": 2.23, - "learning_rate": 0.00018596938775510206, - "loss": 1.7517, + "epoch": 1.24, + "learning_rate": 0.000184375, + "loss": 2.0044, "step": 110 }, { - "epoch": 2.25, - "learning_rate": 0.0001858418367346939, - "loss": 1.7515, + "epoch": 1.25, + "learning_rate": 0.00018423295454545455, + "loss": 2.028, "step": 111 }, { - "epoch": 2.27, - "learning_rate": 0.00018571428571428572, - "loss": 1.7842, + "epoch": 1.27, + "learning_rate": 0.00018409090909090909, + "loss": 1.975, "step": 112 }, { - "epoch": 2.29, - "learning_rate": 0.00018558673469387755, - "loss": 1.8001, + "epoch": 1.28, + "learning_rate": 0.00018394886363636364, + "loss": 1.9654, "step": 113 }, { - "epoch": 2.31, - "learning_rate": 0.0001854591836734694, - "loss": 1.7653, + "epoch": 1.29, + "learning_rate": 0.0001838068181818182, + "loss": 2.013, "step": 114 }, { - "epoch": 2.33, - "learning_rate": 0.00018533163265306124, - "loss": 1.694, + "epoch": 1.3, + "learning_rate": 0.00018366477272727273, + "loss": 1.9918, "step": 115 }, { - "epoch": 2.35, - "learning_rate": 0.00018520408163265307, - "loss": 1.7457, + "epoch": 1.31, + "learning_rate": 0.00018352272727272727, + "loss": 2.0028, "step": 116 }, { - "epoch": 2.37, - "learning_rate": 0.0001850765306122449, - "loss": 1.7899, + "epoch": 1.32, + "learning_rate": 0.00018338068181818182, + "loss": 1.9906, "step": 117 }, { - "epoch": 2.39, - "learning_rate": 0.00018494897959183676, - "loss": 1.7473, + "epoch": 1.33, + "learning_rate": 0.00018323863636363636, + "loss": 1.9781, "step": 118 }, { - "epoch": 2.41, - "learning_rate": 0.0001848214285714286, - "loss": 1.6639, + "epoch": 1.34, + "learning_rate": 0.00018309659090909091, + "loss": 1.994, "step": 119 }, { - "epoch": 2.43, - "learning_rate": 0.00018469387755102042, - "loss": 1.762, + "epoch": 1.36, + "learning_rate": 0.00018295454545454547, + "loss": 1.9732, "step": 120 }, { - "epoch": 2.45, - "learning_rate": 0.00018456632653061225, - "loss": 1.7378, + "epoch": 1.37, + "learning_rate": 0.0001828125, + "loss": 1.9985, "step": 121 }, { - "epoch": 2.47, - "learning_rate": 0.0001844387755102041, - "loss": 1.672, + "epoch": 1.38, + "learning_rate": 0.00018267045454545454, + "loss": 2.032, "step": 122 }, { - "epoch": 2.49, - "learning_rate": 0.0001843112244897959, - "loss": 1.7267, + "epoch": 1.39, + "learning_rate": 0.0001825284090909091, + "loss": 1.9743, "step": 123 }, { - "epoch": 2.51, - "learning_rate": 0.00018418367346938777, - "loss": 1.7825, + "epoch": 1.4, + "learning_rate": 0.00018238636363636365, + "loss": 1.9857, "step": 124 }, { - "epoch": 2.53, - "learning_rate": 0.0001840561224489796, - "loss": 1.7566, + "epoch": 1.41, + "learning_rate": 0.00018224431818181819, + "loss": 2.0118, "step": 125 }, { - "epoch": 2.55, - "learning_rate": 0.00018392857142857143, - "loss": 1.8169, + "epoch": 1.42, + "learning_rate": 0.00018210227272727274, + "loss": 2.0151, "step": 126 }, { - "epoch": 2.57, - "learning_rate": 0.00018380102040816326, - "loss": 1.6801, + "epoch": 1.43, + "learning_rate": 0.00018196022727272728, + "loss": 1.9863, "step": 127 }, { - "epoch": 2.59, - "learning_rate": 0.00018367346938775512, - "loss": 1.7292, + "epoch": 1.45, + "learning_rate": 0.00018181818181818183, + "loss": 1.9959, "step": 128 }, { - "epoch": 2.61, - "learning_rate": 0.00018354591836734695, - "loss": 1.737, + "epoch": 1.46, + "learning_rate": 0.00018167613636363637, + "loss": 1.9642, "step": 129 }, { - "epoch": 2.63, - "learning_rate": 0.00018341836734693878, - "loss": 1.7696, + "epoch": 1.47, + "learning_rate": 0.00018153409090909092, + "loss": 1.953, "step": 130 }, { - "epoch": 2.65, - "learning_rate": 0.0001832908163265306, - "loss": 1.7239, + "epoch": 1.48, + "learning_rate": 0.00018139204545454546, + "loss": 1.9994, "step": 131 }, { - "epoch": 2.67, - "learning_rate": 0.00018316326530612247, - "loss": 1.7441, + "epoch": 1.49, + "learning_rate": 0.00018125000000000001, + "loss": 1.9557, "step": 132 }, { - "epoch": 2.69, - "learning_rate": 0.0001830357142857143, - "loss": 1.7825, + "epoch": 1.5, + "learning_rate": 0.00018110795454545455, + "loss": 2.0051, "step": 133 }, { - "epoch": 2.71, - "learning_rate": 0.00018290816326530613, - "loss": 1.7411, + "epoch": 1.51, + "learning_rate": 0.0001809659090909091, + "loss": 1.9799, "step": 134 }, { - "epoch": 2.73, - "learning_rate": 0.00018278061224489796, - "loss": 1.7119, + "epoch": 1.53, + "learning_rate": 0.00018082386363636366, + "loss": 1.9696, "step": 135 }, { - "epoch": 2.75, - "learning_rate": 0.0001826530612244898, - "loss": 1.7443, + "epoch": 1.54, + "learning_rate": 0.0001806818181818182, + "loss": 1.9664, "step": 136 }, { - "epoch": 2.77, - "learning_rate": 0.00018252551020408165, - "loss": 1.7197, + "epoch": 1.55, + "learning_rate": 0.00018053977272727273, + "loss": 1.9619, "step": 137 }, { - "epoch": 2.79, - "learning_rate": 0.00018239795918367348, - "loss": 1.7273, + "epoch": 1.56, + "learning_rate": 0.00018039772727272729, + "loss": 1.9833, "step": 138 }, { - "epoch": 2.81, - "learning_rate": 0.0001822704081632653, - "loss": 1.7681, + "epoch": 1.57, + "learning_rate": 0.00018025568181818182, + "loss": 1.9791, "step": 139 }, { - "epoch": 2.83, - "learning_rate": 0.00018214285714285714, - "loss": 1.8088, + "epoch": 1.58, + "learning_rate": 0.00018011363636363638, + "loss": 1.9777, "step": 140 }, { - "epoch": 2.85, - "learning_rate": 0.000182015306122449, - "loss": 1.7301, + "epoch": 1.59, + "learning_rate": 0.00017997159090909093, + "loss": 1.9361, "step": 141 }, { - "epoch": 2.87, - "learning_rate": 0.00018188775510204083, - "loss": 1.6853, + "epoch": 1.6, + "learning_rate": 0.00017982954545454547, + "loss": 1.9449, "step": 142 }, { - "epoch": 2.89, - "learning_rate": 0.00018176020408163266, - "loss": 1.6966, + "epoch": 1.62, + "learning_rate": 0.0001796875, + "loss": 1.9541, "step": 143 }, { - "epoch": 2.91, - "learning_rate": 0.0001816326530612245, - "loss": 1.7938, + "epoch": 1.63, + "learning_rate": 0.00017954545454545456, + "loss": 1.9867, "step": 144 }, { - "epoch": 2.93, - "learning_rate": 0.00018150510204081635, - "loss": 1.7639, + "epoch": 1.64, + "learning_rate": 0.0001794034090909091, + "loss": 1.9433, "step": 145 }, { - "epoch": 2.95, - "learning_rate": 0.00018137755102040816, - "loss": 1.7527, + "epoch": 1.65, + "learning_rate": 0.00017926136363636365, + "loss": 1.9789, "step": 146 }, { - "epoch": 2.97, - "learning_rate": 0.00018125000000000001, - "loss": 1.7386, + "epoch": 1.66, + "learning_rate": 0.0001791193181818182, + "loss": 1.9942, "step": 147 }, { - "epoch": 2.99, - "learning_rate": 0.00018112244897959185, - "loss": 1.7223, + "epoch": 1.67, + "learning_rate": 0.00017897727272727274, + "loss": 1.9724, "step": 148 }, { - "epoch": 3.01, - "learning_rate": 0.00018099489795918368, - "loss": 1.7571, + "epoch": 1.68, + "learning_rate": 0.00017883522727272727, + "loss": 1.9938, "step": 149 }, { - "epoch": 3.04, - "learning_rate": 0.0001808673469387755, - "loss": 1.7054, + "epoch": 1.69, + "learning_rate": 0.00017869318181818183, + "loss": 1.9264, "step": 150 }, { - "epoch": 3.06, - "learning_rate": 0.00018073979591836737, - "loss": 1.6581, + "epoch": 1.71, + "learning_rate": 0.00017855113636363636, + "loss": 1.9372, "step": 151 }, { - "epoch": 3.08, - "learning_rate": 0.00018061224489795917, - "loss": 1.681, + "epoch": 1.72, + "learning_rate": 0.00017840909090909092, + "loss": 1.9463, "step": 152 }, { - "epoch": 3.1, - "learning_rate": 0.00018048469387755103, - "loss": 1.7425, + "epoch": 1.73, + "learning_rate": 0.00017826704545454547, + "loss": 1.9244, "step": 153 }, { - "epoch": 3.12, - "learning_rate": 0.00018035714285714286, - "loss": 1.7108, + "epoch": 1.74, + "learning_rate": 0.000178125, + "loss": 1.9139, "step": 154 }, { - "epoch": 3.14, - "learning_rate": 0.00018022959183673472, - "loss": 1.7194, + "epoch": 1.75, + "learning_rate": 0.00017798295454545454, + "loss": 1.9612, "step": 155 }, { - "epoch": 3.16, - "learning_rate": 0.00018010204081632655, - "loss": 1.6953, + "epoch": 1.76, + "learning_rate": 0.0001778409090909091, + "loss": 1.9399, "step": 156 }, { - "epoch": 3.18, - "learning_rate": 0.00017997448979591838, - "loss": 1.669, + "epoch": 1.77, + "learning_rate": 0.00017769886363636366, + "loss": 1.906, "step": 157 }, { - "epoch": 3.2, - "learning_rate": 0.0001798469387755102, - "loss": 1.744, + "epoch": 1.78, + "learning_rate": 0.0001775568181818182, + "loss": 1.9294, "step": 158 }, { - "epoch": 3.22, - "learning_rate": 0.00017971938775510204, - "loss": 1.6467, + "epoch": 1.8, + "learning_rate": 0.00017741477272727275, + "loss": 1.9663, "step": 159 }, { - "epoch": 3.24, - "learning_rate": 0.0001795918367346939, - "loss": 1.7103, + "epoch": 1.81, + "learning_rate": 0.00017727272727272728, + "loss": 1.9257, "step": 160 }, { - "epoch": 3.26, - "learning_rate": 0.00017946428571428573, - "loss": 1.6662, + "epoch": 1.82, + "learning_rate": 0.0001771306818181818, + "loss": 1.9416, "step": 161 }, { - "epoch": 3.28, - "learning_rate": 0.00017933673469387756, - "loss": 1.6657, + "epoch": 1.83, + "learning_rate": 0.00017698863636363637, + "loss": 1.94, "step": 162 }, { - "epoch": 3.3, - "learning_rate": 0.0001792091836734694, - "loss": 1.791, + "epoch": 1.84, + "learning_rate": 0.00017684659090909093, + "loss": 1.9064, "step": 163 }, { - "epoch": 3.32, - "learning_rate": 0.00017908163265306125, - "loss": 1.7704, + "epoch": 1.85, + "learning_rate": 0.00017670454545454546, + "loss": 1.9363, "step": 164 }, { - "epoch": 3.34, - "learning_rate": 0.00017895408163265305, - "loss": 1.7229, + "epoch": 1.86, + "learning_rate": 0.00017656250000000002, + "loss": 1.9414, "step": 165 }, { - "epoch": 3.36, - "learning_rate": 0.0001788265306122449, - "loss": 1.76, + "epoch": 1.88, + "learning_rate": 0.00017642045454545455, + "loss": 1.9526, "step": 166 }, { - "epoch": 3.38, - "learning_rate": 0.00017869897959183674, - "loss": 1.6482, + "epoch": 1.89, + "learning_rate": 0.00017627840909090908, + "loss": 1.9263, "step": 167 }, { - "epoch": 3.4, - "learning_rate": 0.0001785714285714286, - "loss": 1.8076, + "epoch": 1.9, + "learning_rate": 0.00017613636363636366, + "loss": 1.9251, "step": 168 }, { - "epoch": 3.42, - "learning_rate": 0.0001784438775510204, - "loss": 1.7368, + "epoch": 1.91, + "learning_rate": 0.0001759943181818182, + "loss": 1.9085, "step": 169 }, { - "epoch": 3.44, - "learning_rate": 0.00017831632653061226, - "loss": 1.6264, + "epoch": 1.92, + "learning_rate": 0.00017585227272727273, + "loss": 1.9287, "step": 170 }, { - "epoch": 3.46, - "learning_rate": 0.0001781887755102041, - "loss": 1.6289, + "epoch": 1.93, + "learning_rate": 0.00017571022727272729, + "loss": 1.9246, "step": 171 }, { - "epoch": 3.48, - "learning_rate": 0.00017806122448979592, - "loss": 1.7913, + "epoch": 1.94, + "learning_rate": 0.00017556818181818182, + "loss": 1.916, "step": 172 }, { - "epoch": 3.5, - "learning_rate": 0.00017793367346938775, - "loss": 1.6985, + "epoch": 1.95, + "learning_rate": 0.00017542613636363635, + "loss": 1.9297, "step": 173 }, { - "epoch": 3.52, - "learning_rate": 0.0001778061224489796, - "loss": 1.6936, + "epoch": 1.97, + "learning_rate": 0.00017528409090909094, + "loss": 1.8881, "step": 174 }, { - "epoch": 3.54, - "learning_rate": 0.00017767857142857141, - "loss": 1.8068, + "epoch": 1.98, + "learning_rate": 0.00017514204545454547, + "loss": 1.9208, "step": 175 }, { - "epoch": 3.56, - "learning_rate": 0.00017755102040816327, - "loss": 1.7243, + "epoch": 1.99, + "learning_rate": 0.000175, + "loss": 1.9233, "step": 176 }, { - "epoch": 3.58, - "learning_rate": 0.0001774234693877551, - "loss": 1.6893, + "epoch": 2.0, + "learning_rate": 0.00017485795454545456, + "loss": 1.9309, "step": 177 }, { - "epoch": 3.6, - "learning_rate": 0.00017729591836734696, - "loss": 1.8122, + "epoch": 2.01, + "learning_rate": 0.0001747159090909091, + "loss": 1.877, "step": 178 }, { - "epoch": 3.62, - "learning_rate": 0.0001771683673469388, - "loss": 1.6562, + "epoch": 2.02, + "learning_rate": 0.00017457386363636365, + "loss": 1.9083, "step": 179 }, { - "epoch": 3.64, - "learning_rate": 0.00017704081632653062, - "loss": 1.6999, + "epoch": 2.03, + "learning_rate": 0.0001744318181818182, + "loss": 1.8733, "step": 180 }, { - "epoch": 3.66, - "learning_rate": 0.00017691326530612245, - "loss": 1.7229, + "epoch": 2.04, + "learning_rate": 0.00017428977272727274, + "loss": 1.8905, "step": 181 }, { - "epoch": 3.68, - "learning_rate": 0.00017678571428571428, - "loss": 1.6764, + "epoch": 2.06, + "learning_rate": 0.00017414772727272727, + "loss": 1.9175, "step": 182 }, { - "epoch": 3.7, - "learning_rate": 0.00017665816326530614, - "loss": 1.6982, + "epoch": 2.07, + "learning_rate": 0.00017400568181818183, + "loss": 1.8846, "step": 183 }, { - "epoch": 3.72, - "learning_rate": 0.00017653061224489797, - "loss": 1.696, + "epoch": 2.08, + "learning_rate": 0.00017386363636363636, + "loss": 1.8847, "step": 184 }, { - "epoch": 3.74, - "learning_rate": 0.0001764030612244898, - "loss": 1.6797, + "epoch": 2.09, + "learning_rate": 0.00017372159090909092, + "loss": 1.8948, "step": 185 }, { - "epoch": 3.76, - "learning_rate": 0.00017627551020408164, - "loss": 1.637, + "epoch": 2.1, + "learning_rate": 0.00017357954545454548, + "loss": 1.8728, "step": 186 }, { - "epoch": 3.78, - "learning_rate": 0.0001761479591836735, - "loss": 1.7074, + "epoch": 2.11, + "learning_rate": 0.0001734375, + "loss": 1.8934, "step": 187 }, { - "epoch": 3.8, - "learning_rate": 0.0001760204081632653, - "loss": 1.705, + "epoch": 2.12, + "learning_rate": 0.00017329545454545454, + "loss": 1.8796, "step": 188 }, { - "epoch": 3.82, - "learning_rate": 0.00017589285714285716, - "loss": 1.6153, + "epoch": 2.14, + "learning_rate": 0.0001731534090909091, + "loss": 1.902, "step": 189 }, { - "epoch": 3.84, - "learning_rate": 0.00017576530612244899, - "loss": 1.7354, + "epoch": 2.15, + "learning_rate": 0.00017301136363636366, + "loss": 1.8864, "step": 190 }, { - "epoch": 3.86, - "learning_rate": 0.00017563775510204084, - "loss": 1.6941, + "epoch": 2.16, + "learning_rate": 0.0001728693181818182, + "loss": 1.8682, "step": 191 }, { - "epoch": 3.88, - "learning_rate": 0.00017551020408163265, - "loss": 1.7231, + "epoch": 2.17, + "learning_rate": 0.00017272727272727275, + "loss": 1.8662, "step": 192 }, { - "epoch": 3.91, - "learning_rate": 0.0001753826530612245, - "loss": 1.7663, + "epoch": 2.18, + "learning_rate": 0.00017258522727272728, + "loss": 1.8526, "step": 193 }, { - "epoch": 3.93, - "learning_rate": 0.00017525510204081634, - "loss": 1.6532, + "epoch": 2.19, + "learning_rate": 0.0001724431818181818, + "loss": 1.8682, "step": 194 }, { - "epoch": 3.95, - "learning_rate": 0.00017512755102040817, - "loss": 1.7115, + "epoch": 2.2, + "learning_rate": 0.00017230113636363637, + "loss": 1.8205, "step": 195 }, { - "epoch": 3.97, - "learning_rate": 0.000175, - "loss": 1.6955, + "epoch": 2.21, + "learning_rate": 0.00017215909090909093, + "loss": 1.8726, "step": 196 }, { - "epoch": 3.99, - "learning_rate": 0.00017487244897959186, - "loss": 1.6863, + "epoch": 2.23, + "learning_rate": 0.00017201704545454546, + "loss": 1.8241, "step": 197 }, { - "epoch": 4.01, - "learning_rate": 0.00017474489795918366, - "loss": 1.7012, + "epoch": 2.24, + "learning_rate": 0.00017187500000000002, + "loss": 1.9, "step": 198 }, { - "epoch": 4.03, - "learning_rate": 0.00017461734693877552, - "loss": 1.5927, + "epoch": 2.25, + "learning_rate": 0.00017173295454545455, + "loss": 1.8496, "step": 199 }, { - "epoch": 4.05, - "learning_rate": 0.00017448979591836735, - "loss": 1.6272, + "epoch": 2.26, + "learning_rate": 0.00017159090909090908, + "loss": 1.8562, "step": 200 }, { - "epoch": 4.07, - "learning_rate": 0.00017436224489795918, - "loss": 1.5994, + "epoch": 2.27, + "learning_rate": 0.00017144886363636367, + "loss": 1.8594, "step": 201 }, { - "epoch": 4.09, - "learning_rate": 0.00017423469387755104, - "loss": 1.7141, + "epoch": 2.28, + "learning_rate": 0.0001713068181818182, + "loss": 1.8606, "step": 202 }, { - "epoch": 4.11, - "learning_rate": 0.00017410714285714287, - "loss": 1.7547, + "epoch": 2.29, + "learning_rate": 0.00017116477272727273, + "loss": 1.8712, "step": 203 }, { - "epoch": 4.13, - "learning_rate": 0.0001739795918367347, - "loss": 1.6254, + "epoch": 2.3, + "learning_rate": 0.0001710227272727273, + "loss": 1.897, "step": 204 }, { - "epoch": 4.15, - "learning_rate": 0.00017385204081632653, - "loss": 1.6686, + "epoch": 2.32, + "learning_rate": 0.00017088068181818182, + "loss": 1.8287, "step": 205 }, { - "epoch": 4.17, - "learning_rate": 0.0001737244897959184, - "loss": 1.6684, + "epoch": 2.33, + "learning_rate": 0.00017073863636363635, + "loss": 1.8698, "step": 206 }, { - "epoch": 4.19, - "learning_rate": 0.00017359693877551022, - "loss": 1.6724, + "epoch": 2.34, + "learning_rate": 0.00017059659090909094, + "loss": 1.8611, "step": 207 }, { - "epoch": 4.21, - "learning_rate": 0.00017346938775510205, - "loss": 1.7361, + "epoch": 2.35, + "learning_rate": 0.00017045454545454547, + "loss": 1.8161, "step": 208 }, { - "epoch": 4.23, - "learning_rate": 0.00017334183673469388, - "loss": 1.7167, + "epoch": 2.36, + "learning_rate": 0.0001703125, + "loss": 1.8303, "step": 209 }, { - "epoch": 4.25, - "learning_rate": 0.00017321428571428574, - "loss": 1.7226, + "epoch": 2.37, + "learning_rate": 0.00017017045454545456, + "loss": 1.8423, "step": 210 }, { - "epoch": 4.27, - "learning_rate": 0.00017308673469387754, - "loss": 1.7133, + "epoch": 2.38, + "learning_rate": 0.0001700284090909091, + "loss": 1.861, "step": 211 }, { - "epoch": 4.29, - "learning_rate": 0.0001729591836734694, - "loss": 1.649, + "epoch": 2.4, + "learning_rate": 0.00016988636363636365, + "loss": 1.864, "step": 212 }, { - "epoch": 4.31, - "learning_rate": 0.00017283163265306123, - "loss": 1.7104, + "epoch": 2.41, + "learning_rate": 0.0001697443181818182, + "loss": 1.8448, "step": 213 }, { - "epoch": 4.33, - "learning_rate": 0.00017270408163265306, - "loss": 1.6861, + "epoch": 2.42, + "learning_rate": 0.00016960227272727274, + "loss": 1.8463, "step": 214 }, { - "epoch": 4.35, - "learning_rate": 0.0001725765306122449, - "loss": 1.648, + "epoch": 2.43, + "learning_rate": 0.00016946022727272727, + "loss": 1.8482, "step": 215 }, { - "epoch": 4.37, - "learning_rate": 0.00017244897959183675, - "loss": 1.6215, + "epoch": 2.44, + "learning_rate": 0.00016931818181818183, + "loss": 1.8289, "step": 216 }, { - "epoch": 4.39, - "learning_rate": 0.00017232142857142858, - "loss": 1.6334, + "epoch": 2.45, + "learning_rate": 0.00016917613636363636, + "loss": 1.8352, "step": 217 }, { - "epoch": 4.41, - "learning_rate": 0.0001721938775510204, - "loss": 1.6283, + "epoch": 2.46, + "learning_rate": 0.00016903409090909092, + "loss": 1.8161, "step": 218 }, { - "epoch": 4.43, - "learning_rate": 0.00017206632653061224, - "loss": 1.6462, + "epoch": 2.47, + "learning_rate": 0.00016889204545454548, + "loss": 1.8512, "step": 219 }, { - "epoch": 4.45, - "learning_rate": 0.0001719387755102041, - "loss": 1.7233, + "epoch": 2.49, + "learning_rate": 0.00016875, + "loss": 1.8211, "step": 220 }, { - "epoch": 4.47, - "learning_rate": 0.0001718112244897959, - "loss": 1.7839, + "epoch": 2.5, + "learning_rate": 0.00016860795454545454, + "loss": 1.7831, "step": 221 }, { - "epoch": 4.49, - "learning_rate": 0.00017168367346938776, - "loss": 1.7204, + "epoch": 2.51, + "learning_rate": 0.0001684659090909091, + "loss": 1.8232, "step": 222 }, { - "epoch": 4.51, - "learning_rate": 0.0001715561224489796, - "loss": 1.7671, + "epoch": 2.52, + "learning_rate": 0.00016832386363636366, + "loss": 1.8253, "step": 223 }, { - "epoch": 4.53, - "learning_rate": 0.00017142857142857143, - "loss": 1.6824, + "epoch": 2.53, + "learning_rate": 0.0001681818181818182, + "loss": 1.7994, "step": 224 }, { - "epoch": 4.55, - "learning_rate": 0.00017130102040816328, - "loss": 1.7068, + "epoch": 2.54, + "learning_rate": 0.00016803977272727275, + "loss": 1.8405, "step": 225 }, { - "epoch": 4.57, - "learning_rate": 0.00017117346938775511, - "loss": 1.6515, + "epoch": 2.55, + "learning_rate": 0.00016789772727272728, + "loss": 1.816, "step": 226 }, { - "epoch": 4.59, - "learning_rate": 0.00017104591836734694, - "loss": 1.6586, + "epoch": 2.56, + "learning_rate": 0.0001677556818181818, + "loss": 1.8343, "step": 227 }, { - "epoch": 4.61, - "learning_rate": 0.00017091836734693878, - "loss": 1.6355, + "epoch": 2.58, + "learning_rate": 0.00016761363636363637, + "loss": 1.8068, "step": 228 }, { - "epoch": 4.63, - "learning_rate": 0.00017079081632653063, - "loss": 1.7173, + "epoch": 2.59, + "learning_rate": 0.00016747159090909093, + "loss": 1.8337, "step": 229 }, { - "epoch": 4.65, - "learning_rate": 0.00017066326530612246, - "loss": 1.6585, + "epoch": 2.6, + "learning_rate": 0.00016732954545454546, + "loss": 1.8269, "step": 230 }, { - "epoch": 4.67, - "learning_rate": 0.0001705357142857143, - "loss": 1.5856, + "epoch": 2.61, + "learning_rate": 0.00016718750000000002, + "loss": 1.8243, "step": 231 }, { - "epoch": 4.69, - "learning_rate": 0.00017040816326530613, - "loss": 1.5923, + "epoch": 2.62, + "learning_rate": 0.00016704545454545455, + "loss": 1.7766, "step": 232 }, { - "epoch": 4.71, - "learning_rate": 0.00017028061224489798, - "loss": 1.7128, + "epoch": 2.63, + "learning_rate": 0.00016690340909090908, + "loss": 1.8144, "step": 233 }, { - "epoch": 4.73, - "learning_rate": 0.0001701530612244898, - "loss": 1.6971, + "epoch": 2.64, + "learning_rate": 0.00016676136363636367, + "loss": 1.8113, "step": 234 }, { - "epoch": 4.75, - "learning_rate": 0.00017002551020408165, - "loss": 1.6416, + "epoch": 2.65, + "learning_rate": 0.0001666193181818182, + "loss": 1.8086, "step": 235 }, { - "epoch": 4.78, - "learning_rate": 0.00016989795918367348, - "loss": 1.645, + "epoch": 2.67, + "learning_rate": 0.00016647727272727273, + "loss": 1.785, "step": 236 }, { - "epoch": 4.8, - "learning_rate": 0.0001697704081632653, - "loss": 1.6792, + "epoch": 2.68, + "learning_rate": 0.0001663352272727273, + "loss": 1.7884, "step": 237 }, { - "epoch": 4.82, - "learning_rate": 0.00016964285714285714, - "loss": 1.6522, + "epoch": 2.69, + "learning_rate": 0.00016619318181818182, + "loss": 1.7953, "step": 238 }, { - "epoch": 4.84, - "learning_rate": 0.000169515306122449, - "loss": 1.6315, + "epoch": 2.7, + "learning_rate": 0.00016605113636363635, + "loss": 1.8013, "step": 239 }, { - "epoch": 4.86, - "learning_rate": 0.00016938775510204083, - "loss": 1.6622, + "epoch": 2.71, + "learning_rate": 0.00016590909090909094, + "loss": 1.8074, "step": 240 }, { - "epoch": 4.88, - "learning_rate": 0.00016926020408163266, - "loss": 1.6566, + "epoch": 2.72, + "learning_rate": 0.00016576704545454547, + "loss": 1.82, "step": 241 }, { - "epoch": 4.9, - "learning_rate": 0.0001691326530612245, - "loss": 1.7141, + "epoch": 2.73, + "learning_rate": 0.000165625, + "loss": 1.7665, "step": 242 }, { - "epoch": 4.92, - "learning_rate": 0.00016900510204081635, - "loss": 1.5873, + "epoch": 2.75, + "learning_rate": 0.00016548295454545456, + "loss": 1.7638, "step": 243 }, { - "epoch": 4.94, - "learning_rate": 0.00016887755102040818, - "loss": 1.6571, + "epoch": 2.76, + "learning_rate": 0.0001653409090909091, + "loss": 1.7724, "step": 244 }, { - "epoch": 4.96, - "learning_rate": 0.00016875, - "loss": 1.6829, + "epoch": 2.77, + "learning_rate": 0.00016519886363636365, + "loss": 1.7917, "step": 245 }, { - "epoch": 4.98, - "learning_rate": 0.00016862244897959184, - "loss": 1.6935, + "epoch": 2.78, + "learning_rate": 0.0001650568181818182, + "loss": 1.8442, "step": 246 }, { - "epoch": 5.0, - "learning_rate": 0.00016849489795918367, - "loss": 1.6782, + "epoch": 2.79, + "learning_rate": 0.00016491477272727274, + "loss": 1.7887, "step": 247 }, { - "epoch": 5.02, - "learning_rate": 0.00016836734693877553, - "loss": 1.622, + "epoch": 2.8, + "learning_rate": 0.00016477272727272727, + "loss": 1.8055, "step": 248 }, { - "epoch": 5.04, - "learning_rate": 0.00016823979591836736, - "loss": 1.6596, + "epoch": 2.81, + "learning_rate": 0.00016463068181818183, + "loss": 1.7754, "step": 249 }, { - "epoch": 5.06, - "learning_rate": 0.0001681122448979592, - "loss": 1.5821, + "epoch": 2.82, + "learning_rate": 0.00016448863636363636, + "loss": 1.7948, "step": 250 }, { - "epoch": 5.08, - "learning_rate": 0.00016798469387755102, - "loss": 1.7292, + "epoch": 2.84, + "learning_rate": 0.00016434659090909092, + "loss": 1.8332, "step": 251 }, { - "epoch": 5.1, - "learning_rate": 0.00016785714285714288, - "loss": 1.646, + "epoch": 2.85, + "learning_rate": 0.00016420454545454548, + "loss": 1.772, "step": 252 }, { - "epoch": 5.12, - "learning_rate": 0.0001677295918367347, - "loss": 1.6969, + "epoch": 2.86, + "learning_rate": 0.0001640625, + "loss": 1.7781, "step": 253 }, { - "epoch": 5.14, - "learning_rate": 0.00016760204081632654, - "loss": 1.6082, + "epoch": 2.87, + "learning_rate": 0.00016392045454545454, + "loss": 1.7714, "step": 254 }, { - "epoch": 5.16, - "learning_rate": 0.00016747448979591837, - "loss": 1.5843, + "epoch": 2.88, + "learning_rate": 0.0001637784090909091, + "loss": 1.793, "step": 255 }, { - "epoch": 5.18, - "learning_rate": 0.00016734693877551023, - "loss": 1.6827, + "epoch": 2.89, + "learning_rate": 0.00016363636363636366, + "loss": 1.8038, "step": 256 }, { - "epoch": 5.2, - "learning_rate": 0.00016721938775510203, - "loss": 1.5824, + "epoch": 2.9, + "learning_rate": 0.0001634943181818182, + "loss": 1.8137, "step": 257 }, { - "epoch": 5.22, - "learning_rate": 0.0001670918367346939, - "loss": 1.6795, + "epoch": 2.91, + "learning_rate": 0.00016335227272727275, + "loss": 1.7726, "step": 258 }, { - "epoch": 5.24, - "learning_rate": 0.00016696428571428572, - "loss": 1.5639, + "epoch": 2.93, + "learning_rate": 0.00016321022727272728, + "loss": 1.7753, "step": 259 }, { - "epoch": 5.26, - "learning_rate": 0.00016683673469387755, - "loss": 1.592, + "epoch": 2.94, + "learning_rate": 0.0001630681818181818, + "loss": 1.7553, "step": 260 }, { - "epoch": 5.28, - "learning_rate": 0.00016670918367346938, - "loss": 1.65, + "epoch": 2.95, + "learning_rate": 0.00016292613636363637, + "loss": 1.7518, "step": 261 }, { - "epoch": 5.3, - "learning_rate": 0.00016658163265306124, - "loss": 1.5592, + "epoch": 2.96, + "learning_rate": 0.00016278409090909093, + "loss": 1.7724, "step": 262 }, { - "epoch": 5.32, - "learning_rate": 0.00016645408163265305, - "loss": 1.5091, + "epoch": 2.97, + "learning_rate": 0.00016264204545454546, + "loss": 1.7266, "step": 263 }, { - "epoch": 5.34, - "learning_rate": 0.0001663265306122449, - "loss": 1.6138, + "epoch": 2.98, + "learning_rate": 0.00016250000000000002, + "loss": 1.8032, "step": 264 }, { - "epoch": 5.36, - "learning_rate": 0.00016619897959183673, - "loss": 1.625, + "epoch": 2.99, + "learning_rate": 0.00016235795454545455, + "loss": 1.7345, "step": 265 }, { - "epoch": 5.38, - "learning_rate": 0.0001660714285714286, - "loss": 1.5757, + "epoch": 3.01, + "learning_rate": 0.00016221590909090908, + "loss": 1.7249, "step": 266 }, { - "epoch": 5.4, - "learning_rate": 0.00016594387755102042, - "loss": 1.6372, + "epoch": 3.02, + "learning_rate": 0.00016207386363636364, + "loss": 1.7218, "step": 267 }, { - "epoch": 5.42, - "learning_rate": 0.00016581632653061225, - "loss": 1.5891, + "epoch": 3.03, + "learning_rate": 0.0001619318181818182, + "loss": 1.7092, "step": 268 }, { - "epoch": 5.44, - "learning_rate": 0.00016568877551020409, - "loss": 1.6893, + "epoch": 3.04, + "learning_rate": 0.00016178977272727273, + "loss": 1.6807, "step": 269 }, { - "epoch": 5.46, - "learning_rate": 0.00016556122448979592, - "loss": 1.6662, + "epoch": 3.05, + "learning_rate": 0.0001616477272727273, + "loss": 1.7264, "step": 270 }, { - "epoch": 5.48, - "learning_rate": 0.00016543367346938777, - "loss": 1.7132, + "epoch": 3.06, + "learning_rate": 0.00016150568181818182, + "loss": 1.726, "step": 271 }, { - "epoch": 5.5, - "learning_rate": 0.0001653061224489796, - "loss": 1.5835, + "epoch": 3.07, + "learning_rate": 0.00016136363636363635, + "loss": 1.6986, "step": 272 }, { - "epoch": 5.52, - "learning_rate": 0.00016517857142857144, - "loss": 1.6342, + "epoch": 3.08, + "learning_rate": 0.0001612215909090909, + "loss": 1.68, "step": 273 }, { - "epoch": 5.54, - "learning_rate": 0.00016505102040816327, - "loss": 1.6717, + "epoch": 3.1, + "learning_rate": 0.00016107954545454547, + "loss": 1.6677, "step": 274 }, { - "epoch": 5.56, - "learning_rate": 0.00016492346938775512, - "loss": 1.6248, + "epoch": 3.11, + "learning_rate": 0.0001609375, + "loss": 1.7137, "step": 275 }, { - "epoch": 5.58, - "learning_rate": 0.00016479591836734696, - "loss": 1.6117, + "epoch": 3.12, + "learning_rate": 0.00016079545454545456, + "loss": 1.6671, "step": 276 }, { - "epoch": 5.6, - "learning_rate": 0.0001646683673469388, - "loss": 1.6798, + "epoch": 3.13, + "learning_rate": 0.0001606534090909091, + "loss": 1.6873, "step": 277 }, { - "epoch": 5.63, - "learning_rate": 0.00016454081632653062, - "loss": 1.6406, + "epoch": 3.14, + "learning_rate": 0.00016051136363636365, + "loss": 1.6694, "step": 278 }, { - "epoch": 5.65, - "learning_rate": 0.00016441326530612248, - "loss": 1.6512, + "epoch": 3.15, + "learning_rate": 0.00016036931818181818, + "loss": 1.7003, "step": 279 }, { - "epoch": 5.67, - "learning_rate": 0.00016428571428571428, - "loss": 1.6102, + "epoch": 3.16, + "learning_rate": 0.00016022727272727274, + "loss": 1.6861, "step": 280 }, { - "epoch": 5.69, - "learning_rate": 0.00016415816326530614, - "loss": 1.6113, + "epoch": 3.17, + "learning_rate": 0.00016008522727272727, + "loss": 1.6881, "step": 281 }, { - "epoch": 5.71, - "learning_rate": 0.00016403061224489797, - "loss": 1.7116, + "epoch": 3.19, + "learning_rate": 0.00015994318181818183, + "loss": 1.6848, "step": 282 }, { - "epoch": 5.73, - "learning_rate": 0.0001639030612244898, - "loss": 1.6846, + "epoch": 3.2, + "learning_rate": 0.00015980113636363636, + "loss": 1.6872, "step": 283 }, { - "epoch": 5.75, - "learning_rate": 0.00016377551020408163, - "loss": 1.6911, + "epoch": 3.21, + "learning_rate": 0.00015965909090909092, + "loss": 1.6975, "step": 284 }, { - "epoch": 5.77, - "learning_rate": 0.0001636479591836735, - "loss": 1.6202, + "epoch": 3.22, + "learning_rate": 0.00015951704545454545, + "loss": 1.6708, "step": 285 }, { - "epoch": 5.79, - "learning_rate": 0.0001635204081632653, - "loss": 1.5715, + "epoch": 3.23, + "learning_rate": 0.000159375, + "loss": 1.6985, "step": 286 }, { - "epoch": 5.81, - "learning_rate": 0.00016339285714285715, - "loss": 1.6461, + "epoch": 3.24, + "learning_rate": 0.00015923295454545454, + "loss": 1.6586, "step": 287 }, { - "epoch": 5.83, - "learning_rate": 0.00016326530612244898, - "loss": 1.6624, + "epoch": 3.25, + "learning_rate": 0.0001590909090909091, + "loss": 1.6707, "step": 288 }, { - "epoch": 5.85, - "learning_rate": 0.00016313775510204084, - "loss": 1.6535, + "epoch": 3.26, + "learning_rate": 0.00015894886363636366, + "loss": 1.6576, "step": 289 }, { - "epoch": 5.87, - "learning_rate": 0.00016301020408163267, - "loss": 1.6275, + "epoch": 3.28, + "learning_rate": 0.0001588068181818182, + "loss": 1.6625, "step": 290 }, { - "epoch": 5.89, - "learning_rate": 0.0001628826530612245, - "loss": 1.6636, + "epoch": 3.29, + "learning_rate": 0.00015866477272727275, + "loss": 1.677, "step": 291 }, { - "epoch": 5.91, - "learning_rate": 0.00016275510204081633, - "loss": 1.6546, + "epoch": 3.3, + "learning_rate": 0.00015852272727272728, + "loss": 1.6599, "step": 292 }, { - "epoch": 5.93, - "learning_rate": 0.00016262755102040816, - "loss": 1.7274, + "epoch": 3.31, + "learning_rate": 0.0001583806818181818, + "loss": 1.6674, "step": 293 }, { - "epoch": 5.95, - "learning_rate": 0.00016250000000000002, - "loss": 1.5901, + "epoch": 3.32, + "learning_rate": 0.00015823863636363637, + "loss": 1.6707, "step": 294 }, { - "epoch": 5.97, - "learning_rate": 0.00016237244897959185, - "loss": 1.6046, + "epoch": 3.33, + "learning_rate": 0.00015809659090909093, + "loss": 1.6788, "step": 295 }, { - "epoch": 5.99, - "learning_rate": 0.00016224489795918368, - "loss": 1.5828, + "epoch": 3.34, + "learning_rate": 0.00015795454545454546, + "loss": 1.6686, "step": 296 }, { - "epoch": 6.01, - "learning_rate": 0.0001621173469387755, - "loss": 1.6435, + "epoch": 3.36, + "learning_rate": 0.00015781250000000002, + "loss": 1.6488, "step": 297 }, { - "epoch": 6.03, - "learning_rate": 0.00016198979591836737, - "loss": 1.6263, + "epoch": 3.37, + "learning_rate": 0.00015767045454545455, + "loss": 1.6806, "step": 298 }, { - "epoch": 6.05, - "learning_rate": 0.00016186224489795917, - "loss": 1.4944, + "epoch": 3.38, + "learning_rate": 0.00015752840909090908, + "loss": 1.6862, "step": 299 }, { - "epoch": 6.07, - "learning_rate": 0.00016173469387755103, - "loss": 1.6286, + "epoch": 3.39, + "learning_rate": 0.00015738636363636364, + "loss": 1.6499, "step": 300 }, { - "epoch": 6.09, - "learning_rate": 0.00016160714285714286, - "loss": 1.694, + "epoch": 3.4, + "learning_rate": 0.0001572443181818182, + "loss": 1.6245, "step": 301 }, { - "epoch": 6.11, - "learning_rate": 0.00016147959183673472, - "loss": 1.6197, + "epoch": 3.41, + "learning_rate": 0.00015710227272727273, + "loss": 1.6268, "step": 302 }, { - "epoch": 6.13, - "learning_rate": 0.00016135204081632652, - "loss": 1.5597, + "epoch": 3.42, + "learning_rate": 0.0001569602272727273, + "loss": 1.6438, "step": 303 }, { - "epoch": 6.15, - "learning_rate": 0.00016122448979591838, - "loss": 1.5487, + "epoch": 3.43, + "learning_rate": 0.00015681818181818182, + "loss": 1.6681, "step": 304 }, { - "epoch": 6.17, - "learning_rate": 0.0001610969387755102, - "loss": 1.5769, + "epoch": 3.45, + "learning_rate": 0.00015667613636363635, + "loss": 1.6582, "step": 305 }, { - "epoch": 6.19, - "learning_rate": 0.00016096938775510204, - "loss": 1.6367, + "epoch": 3.46, + "learning_rate": 0.0001565340909090909, + "loss": 1.6432, "step": 306 }, { - "epoch": 6.21, - "learning_rate": 0.00016084183673469388, - "loss": 1.583, + "epoch": 3.47, + "learning_rate": 0.00015639204545454547, + "loss": 1.617, "step": 307 }, { - "epoch": 6.23, - "learning_rate": 0.00016071428571428573, - "loss": 1.6201, + "epoch": 3.48, + "learning_rate": 0.00015625, + "loss": 1.6569, "step": 308 }, { - "epoch": 6.25, - "learning_rate": 0.00016058673469387754, - "loss": 1.6586, + "epoch": 3.49, + "learning_rate": 0.00015610795454545456, + "loss": 1.6276, "step": 309 }, { - "epoch": 6.27, - "learning_rate": 0.0001604591836734694, - "loss": 1.6711, + "epoch": 3.5, + "learning_rate": 0.0001559659090909091, + "loss": 1.6432, "step": 310 }, { - "epoch": 6.29, - "learning_rate": 0.00016033163265306123, - "loss": 1.6402, + "epoch": 3.51, + "learning_rate": 0.00015582386363636365, + "loss": 1.6132, "step": 311 }, { - "epoch": 6.31, - "learning_rate": 0.00016020408163265306, - "loss": 1.5247, + "epoch": 3.52, + "learning_rate": 0.00015568181818181818, + "loss": 1.5997, "step": 312 }, { - "epoch": 6.33, - "learning_rate": 0.00016007653061224491, - "loss": 1.5356, + "epoch": 3.54, + "learning_rate": 0.00015553977272727274, + "loss": 1.6154, "step": 313 }, { - "epoch": 6.35, - "learning_rate": 0.00015994897959183675, - "loss": 1.564, + "epoch": 3.55, + "learning_rate": 0.00015539772727272727, + "loss": 1.5862, "step": 314 }, { - "epoch": 6.37, - "learning_rate": 0.00015982142857142858, - "loss": 1.563, + "epoch": 3.56, + "learning_rate": 0.00015525568181818183, + "loss": 1.6233, "step": 315 }, { - "epoch": 6.39, - "learning_rate": 0.0001596938775510204, - "loss": 1.5198, + "epoch": 3.57, + "learning_rate": 0.00015511363636363636, + "loss": 1.6265, "step": 316 }, { - "epoch": 6.41, - "learning_rate": 0.00015956632653061227, - "loss": 1.6558, + "epoch": 3.58, + "learning_rate": 0.00015497159090909092, + "loss": 1.6171, "step": 317 }, { - "epoch": 6.43, - "learning_rate": 0.0001594387755102041, - "loss": 1.5534, + "epoch": 3.59, + "learning_rate": 0.00015482954545454545, + "loss": 1.6303, "step": 318 }, { - "epoch": 6.45, - "learning_rate": 0.00015931122448979593, - "loss": 1.6239, + "epoch": 3.6, + "learning_rate": 0.0001546875, + "loss": 1.6272, "step": 319 }, { - "epoch": 6.47, - "learning_rate": 0.00015918367346938776, - "loss": 1.5645, + "epoch": 3.62, + "learning_rate": 0.00015454545454545454, + "loss": 1.6183, "step": 320 }, { - "epoch": 6.5, - "learning_rate": 0.00015905612244897962, - "loss": 1.5713, + "epoch": 3.63, + "learning_rate": 0.0001544034090909091, + "loss": 1.6205, "step": 321 }, { - "epoch": 6.52, - "learning_rate": 0.00015892857142857142, - "loss": 1.6176, + "epoch": 3.64, + "learning_rate": 0.00015426136363636366, + "loss": 1.6099, "step": 322 }, { - "epoch": 6.54, - "learning_rate": 0.00015880102040816328, - "loss": 1.502, + "epoch": 3.65, + "learning_rate": 0.0001541193181818182, + "loss": 1.5973, "step": 323 }, { - "epoch": 6.56, - "learning_rate": 0.0001586734693877551, - "loss": 1.645, + "epoch": 3.66, + "learning_rate": 0.00015397727272727272, + "loss": 1.6247, "step": 324 }, { - "epoch": 6.58, - "learning_rate": 0.00015854591836734697, - "loss": 1.5904, + "epoch": 3.67, + "learning_rate": 0.00015383522727272728, + "loss": 1.6041, "step": 325 }, { - "epoch": 6.6, - "learning_rate": 0.00015841836734693877, - "loss": 1.6149, + "epoch": 3.68, + "learning_rate": 0.00015369318181818181, + "loss": 1.5835, "step": 326 }, { - "epoch": 6.62, - "learning_rate": 0.00015829081632653063, - "loss": 1.6757, + "epoch": 3.69, + "learning_rate": 0.00015355113636363637, + "loss": 1.608, "step": 327 }, { - "epoch": 6.64, - "learning_rate": 0.00015816326530612246, - "loss": 1.541, + "epoch": 3.71, + "learning_rate": 0.00015340909090909093, + "loss": 1.6155, "step": 328 }, { - "epoch": 6.66, - "learning_rate": 0.0001580357142857143, - "loss": 1.5898, + "epoch": 3.72, + "learning_rate": 0.00015326704545454546, + "loss": 1.5777, "step": 329 }, { - "epoch": 6.68, - "learning_rate": 0.00015790816326530612, - "loss": 1.5441, + "epoch": 3.73, + "learning_rate": 0.000153125, + "loss": 1.5969, "step": 330 }, { - "epoch": 6.7, - "learning_rate": 0.00015778061224489798, - "loss": 1.61, + "epoch": 3.74, + "learning_rate": 0.00015298295454545455, + "loss": 1.5904, "step": 331 }, { - "epoch": 6.72, - "learning_rate": 0.00015765306122448978, - "loss": 1.615, + "epoch": 3.75, + "learning_rate": 0.00015284090909090909, + "loss": 1.586, "step": 332 }, { - "epoch": 6.74, - "learning_rate": 0.00015752551020408164, - "loss": 1.6575, + "epoch": 3.76, + "learning_rate": 0.00015269886363636364, + "loss": 1.582, "step": 333 }, { - "epoch": 6.76, - "learning_rate": 0.00015739795918367347, - "loss": 1.6702, + "epoch": 3.77, + "learning_rate": 0.0001525568181818182, + "loss": 1.548, "step": 334 }, { - "epoch": 6.78, - "learning_rate": 0.0001572704081632653, - "loss": 1.6009, + "epoch": 3.78, + "learning_rate": 0.00015241477272727273, + "loss": 1.5564, "step": 335 }, { - "epoch": 6.8, - "learning_rate": 0.00015714285714285716, - "loss": 1.5568, + "epoch": 3.8, + "learning_rate": 0.00015227272727272727, + "loss": 1.5506, "step": 336 }, { - "epoch": 6.82, - "learning_rate": 0.000157015306122449, - "loss": 1.619, + "epoch": 3.81, + "learning_rate": 0.00015213068181818182, + "loss": 1.5526, "step": 337 }, { - "epoch": 6.84, - "learning_rate": 0.00015688775510204082, - "loss": 1.5563, + "epoch": 3.82, + "learning_rate": 0.00015198863636363636, + "loss": 1.5564, "step": 338 }, { - "epoch": 6.86, - "learning_rate": 0.00015676020408163265, - "loss": 1.6328, + "epoch": 3.83, + "learning_rate": 0.00015184659090909091, + "loss": 1.5598, "step": 339 }, { - "epoch": 6.88, - "learning_rate": 0.0001566326530612245, - "loss": 1.5726, + "epoch": 3.84, + "learning_rate": 0.00015170454545454547, + "loss": 1.5679, "step": 340 }, { - "epoch": 6.9, - "learning_rate": 0.00015650510204081634, - "loss": 1.6199, + "epoch": 3.85, + "learning_rate": 0.0001515625, + "loss": 1.549, "step": 341 }, { - "epoch": 6.92, - "learning_rate": 0.00015637755102040817, - "loss": 1.5722, + "epoch": 3.86, + "learning_rate": 0.00015142045454545454, + "loss": 1.5672, "step": 342 }, { - "epoch": 6.94, - "learning_rate": 0.00015625, - "loss": 1.5685, + "epoch": 3.88, + "learning_rate": 0.0001512784090909091, + "loss": 1.5399, "step": 343 }, { - "epoch": 6.96, - "learning_rate": 0.00015612244897959186, - "loss": 1.5615, + "epoch": 3.89, + "learning_rate": 0.00015113636363636365, + "loss": 1.5576, "step": 344 }, { - "epoch": 6.98, - "learning_rate": 0.00015599489795918366, - "loss": 1.5994, + "epoch": 3.9, + "learning_rate": 0.00015099431818181818, + "loss": 1.549, "step": 345 }, { - "epoch": 7.0, - "learning_rate": 0.00015586734693877552, - "loss": 1.5579, + "epoch": 3.91, + "learning_rate": 0.00015085227272727274, + "loss": 1.5345, "step": 346 }, { - "epoch": 7.02, - "learning_rate": 0.00015573979591836735, - "loss": 1.547, + "epoch": 3.92, + "learning_rate": 0.00015071022727272728, + "loss": 1.5015, "step": 347 }, { - "epoch": 7.04, - "learning_rate": 0.00015561224489795918, - "loss": 1.5292, + "epoch": 3.93, + "learning_rate": 0.0001505681818181818, + "loss": 1.5221, "step": 348 }, { - "epoch": 7.06, - "learning_rate": 0.00015548469387755102, - "loss": 1.6032, + "epoch": 3.94, + "learning_rate": 0.00015042613636363637, + "loss": 1.556, "step": 349 }, { - "epoch": 7.08, - "learning_rate": 0.00015535714285714287, - "loss": 1.5149, + "epoch": 3.95, + "learning_rate": 0.00015028409090909092, + "loss": 1.5276, "step": 350 }, { - "epoch": 7.1, - "learning_rate": 0.0001552295918367347, - "loss": 1.6093, + "epoch": 3.97, + "learning_rate": 0.00015014204545454546, + "loss": 1.552, "step": 351 }, { - "epoch": 7.12, - "learning_rate": 0.00015510204081632654, - "loss": 1.5421, + "epoch": 3.98, + "learning_rate": 0.00015000000000000001, + "loss": 1.5377, "step": 352 }, { - "epoch": 7.14, - "learning_rate": 0.00015497448979591837, - "loss": 1.5733, + "epoch": 3.99, + "learning_rate": 0.00014985795454545455, + "loss": 1.5576, "step": 353 }, { - "epoch": 7.16, - "learning_rate": 0.00015484693877551022, - "loss": 1.5703, + "epoch": 4.0, + "learning_rate": 0.00014971590909090908, + "loss": 1.5295, "step": 354 }, { - "epoch": 7.18, - "learning_rate": 0.00015471938775510203, - "loss": 1.6141, + "epoch": 4.01, + "learning_rate": 0.00014957386363636366, + "loss": 1.4842, "step": 355 }, { - "epoch": 7.2, - "learning_rate": 0.00015459183673469389, - "loss": 1.5526, + "epoch": 4.02, + "learning_rate": 0.0001494318181818182, + "loss": 1.4803, "step": 356 }, { - "epoch": 7.22, - "learning_rate": 0.00015446428571428572, - "loss": 1.5347, + "epoch": 4.03, + "learning_rate": 0.00014928977272727273, + "loss": 1.4559, "step": 357 }, { - "epoch": 7.24, - "learning_rate": 0.00015433673469387755, - "loss": 1.5682, + "epoch": 4.04, + "learning_rate": 0.00014914772727272728, + "loss": 1.4777, "step": 358 }, { - "epoch": 7.26, - "learning_rate": 0.0001542091836734694, - "loss": 1.5292, + "epoch": 4.06, + "learning_rate": 0.00014900568181818182, + "loss": 1.4343, "step": 359 }, { - "epoch": 7.28, - "learning_rate": 0.00015408163265306124, - "loss": 1.499, + "epoch": 4.07, + "learning_rate": 0.00014886363636363635, + "loss": 1.4699, "step": 360 }, { - "epoch": 7.3, - "learning_rate": 0.00015395408163265307, - "loss": 1.5624, + "epoch": 4.08, + "learning_rate": 0.00014872159090909093, + "loss": 1.4452, "step": 361 }, { - "epoch": 7.32, - "learning_rate": 0.0001538265306122449, - "loss": 1.627, + "epoch": 4.09, + "learning_rate": 0.00014857954545454546, + "loss": 1.4461, "step": 362 }, { - "epoch": 7.34, - "learning_rate": 0.00015369897959183676, - "loss": 1.5327, + "epoch": 4.1, + "learning_rate": 0.0001484375, + "loss": 1.4523, "step": 363 }, { - "epoch": 7.37, - "learning_rate": 0.0001535714285714286, - "loss": 1.5622, + "epoch": 4.11, + "learning_rate": 0.00014829545454545455, + "loss": 1.4425, "step": 364 }, { - "epoch": 7.39, - "learning_rate": 0.00015344387755102042, - "loss": 1.5659, + "epoch": 4.12, + "learning_rate": 0.0001481534090909091, + "loss": 1.4559, "step": 365 }, { - "epoch": 7.41, - "learning_rate": 0.00015331632653061225, - "loss": 1.5019, + "epoch": 4.13, + "learning_rate": 0.00014801136363636365, + "loss": 1.4193, "step": 366 }, { - "epoch": 7.43, - "learning_rate": 0.0001531887755102041, - "loss": 1.5921, + "epoch": 4.15, + "learning_rate": 0.0001478693181818182, + "loss": 1.4136, "step": 367 }, { - "epoch": 7.45, - "learning_rate": 0.0001530612244897959, - "loss": 1.5914, + "epoch": 4.16, + "learning_rate": 0.00014772727272727274, + "loss": 1.445, "step": 368 }, { - "epoch": 7.47, - "learning_rate": 0.00015293367346938777, - "loss": 1.5045, + "epoch": 4.17, + "learning_rate": 0.00014758522727272727, + "loss": 1.4304, "step": 369 }, { - "epoch": 7.49, - "learning_rate": 0.0001528061224489796, - "loss": 1.6209, + "epoch": 4.18, + "learning_rate": 0.00014744318181818183, + "loss": 1.3996, "step": 370 }, { - "epoch": 7.51, - "learning_rate": 0.00015267857142857143, - "loss": 1.5198, + "epoch": 4.19, + "learning_rate": 0.00014730113636363636, + "loss": 1.4247, "step": 371 }, { - "epoch": 7.53, - "learning_rate": 0.00015255102040816326, - "loss": 1.5363, + "epoch": 4.2, + "learning_rate": 0.00014715909090909092, + "loss": 1.4303, "step": 372 }, { - "epoch": 7.55, - "learning_rate": 0.00015242346938775512, - "loss": 1.5391, + "epoch": 4.21, + "learning_rate": 0.00014701704545454547, + "loss": 1.4219, "step": 373 }, { - "epoch": 7.57, - "learning_rate": 0.00015229591836734695, - "loss": 1.4546, + "epoch": 4.23, + "learning_rate": 0.000146875, + "loss": 1.4538, "step": 374 }, { - "epoch": 7.59, - "learning_rate": 0.00015216836734693878, - "loss": 1.5546, + "epoch": 4.24, + "learning_rate": 0.00014673295454545454, + "loss": 1.4391, "step": 375 }, { - "epoch": 7.61, - "learning_rate": 0.0001520408163265306, - "loss": 1.5629, + "epoch": 4.25, + "learning_rate": 0.0001465909090909091, + "loss": 1.4482, "step": 376 }, { - "epoch": 7.63, - "learning_rate": 0.00015191326530612247, - "loss": 1.6002, + "epoch": 4.26, + "learning_rate": 0.00014644886363636365, + "loss": 1.4208, "step": 377 }, { - "epoch": 7.65, - "learning_rate": 0.00015178571428571427, - "loss": 1.5543, + "epoch": 4.27, + "learning_rate": 0.00014630681818181819, + "loss": 1.4111, "step": 378 }, { - "epoch": 7.67, - "learning_rate": 0.00015165816326530613, - "loss": 1.5925, + "epoch": 4.28, + "learning_rate": 0.00014616477272727274, + "loss": 1.4318, "step": 379 }, { - "epoch": 7.69, - "learning_rate": 0.00015153061224489796, - "loss": 1.5631, + "epoch": 4.29, + "learning_rate": 0.00014602272727272728, + "loss": 1.3913, "step": 380 }, { - "epoch": 7.71, - "learning_rate": 0.0001514030612244898, - "loss": 1.5677, + "epoch": 4.3, + "learning_rate": 0.0001458806818181818, + "loss": 1.3847, "step": 381 }, { - "epoch": 7.73, - "learning_rate": 0.00015127551020408165, - "loss": 1.5828, + "epoch": 4.32, + "learning_rate": 0.00014573863636363637, + "loss": 1.4254, "step": 382 }, { - "epoch": 7.75, - "learning_rate": 0.00015114795918367348, - "loss": 1.6494, + "epoch": 4.33, + "learning_rate": 0.00014559659090909093, + "loss": 1.4143, "step": 383 }, { - "epoch": 7.77, - "learning_rate": 0.0001510204081632653, - "loss": 1.553, + "epoch": 4.34, + "learning_rate": 0.00014545454545454546, + "loss": 1.4362, "step": 384 }, { - "epoch": 7.79, - "learning_rate": 0.00015089285714285714, - "loss": 1.6156, + "epoch": 4.35, + "learning_rate": 0.00014531250000000002, + "loss": 1.386, "step": 385 }, { - "epoch": 7.81, - "learning_rate": 0.000150765306122449, - "loss": 1.5001, + "epoch": 4.36, + "learning_rate": 0.00014517045454545455, + "loss": 1.4009, "step": 386 }, { - "epoch": 7.83, - "learning_rate": 0.00015063775510204083, - "loss": 1.5321, + "epoch": 4.37, + "learning_rate": 0.00014502840909090908, + "loss": 1.4089, "step": 387 }, { - "epoch": 7.85, - "learning_rate": 0.00015051020408163266, - "loss": 1.5307, + "epoch": 4.38, + "learning_rate": 0.00014488636363636366, + "loss": 1.4117, "step": 388 }, { - "epoch": 7.87, - "learning_rate": 0.0001503826530612245, - "loss": 1.5639, + "epoch": 4.39, + "learning_rate": 0.0001447443181818182, + "loss": 1.3788, "step": 389 }, { - "epoch": 7.89, - "learning_rate": 0.00015025510204081635, - "loss": 1.517, + "epoch": 4.41, + "learning_rate": 0.00014460227272727273, + "loss": 1.3573, "step": 390 }, { - "epoch": 7.91, - "learning_rate": 0.00015012755102040816, - "loss": 1.4776, + "epoch": 4.42, + "learning_rate": 0.00014446022727272729, + "loss": 1.4133, "step": 391 }, { - "epoch": 7.93, - "learning_rate": 0.00015000000000000001, - "loss": 1.5368, + "epoch": 4.43, + "learning_rate": 0.00014431818181818182, + "loss": 1.3866, "step": 392 }, { - "epoch": 7.95, - "learning_rate": 0.00014987244897959184, - "loss": 1.5636, + "epoch": 4.44, + "learning_rate": 0.00014417613636363635, + "loss": 1.3883, "step": 393 }, { - "epoch": 7.97, - "learning_rate": 0.00014974489795918368, - "loss": 1.6004, + "epoch": 4.45, + "learning_rate": 0.00014403409090909093, + "loss": 1.3741, "step": 394 }, { - "epoch": 7.99, - "learning_rate": 0.0001496173469387755, - "loss": 1.5524, + "epoch": 4.46, + "learning_rate": 0.00014389204545454547, + "loss": 1.358, "step": 395 }, { - "epoch": 8.01, - "learning_rate": 0.00014948979591836736, - "loss": 1.5307, + "epoch": 4.47, + "learning_rate": 0.00014375, + "loss": 1.3893, "step": 396 }, { - "epoch": 8.03, - "learning_rate": 0.00014936224489795917, - "loss": 1.5123, + "epoch": 4.49, + "learning_rate": 0.00014360795454545456, + "loss": 1.4062, "step": 397 }, { - "epoch": 8.05, - "learning_rate": 0.00014923469387755103, - "loss": 1.5132, + "epoch": 4.5, + "learning_rate": 0.0001434659090909091, + "loss": 1.3795, "step": 398 }, { - "epoch": 8.07, - "learning_rate": 0.00014910714285714286, - "loss": 1.5109, + "epoch": 4.51, + "learning_rate": 0.00014332386363636365, + "loss": 1.3472, "step": 399 }, { - "epoch": 8.09, - "learning_rate": 0.00014897959183673472, - "loss": 1.5302, + "epoch": 4.52, + "learning_rate": 0.0001431818181818182, + "loss": 1.3408, "step": 400 }, { - "epoch": 8.11, - "learning_rate": 0.00014885204081632652, - "loss": 1.5238, + "epoch": 4.53, + "learning_rate": 0.00014303977272727274, + "loss": 1.3801, "step": 401 }, { - "epoch": 8.13, - "learning_rate": 0.00014872448979591838, - "loss": 1.4781, + "epoch": 4.54, + "learning_rate": 0.00014289772727272727, + "loss": 1.3709, "step": 402 }, { - "epoch": 8.15, - "learning_rate": 0.0001485969387755102, - "loss": 1.5446, + "epoch": 4.55, + "learning_rate": 0.00014275568181818183, + "loss": 1.3653, "step": 403 }, { - "epoch": 8.17, - "learning_rate": 0.00014846938775510204, - "loss": 1.5, + "epoch": 4.56, + "learning_rate": 0.00014261363636363636, + "loss": 1.4089, "step": 404 }, { - "epoch": 8.19, - "learning_rate": 0.0001483418367346939, - "loss": 1.5458, + "epoch": 4.58, + "learning_rate": 0.00014247159090909092, + "loss": 1.3281, "step": 405 }, { - "epoch": 8.21, - "learning_rate": 0.00014821428571428573, - "loss": 1.5257, + "epoch": 4.59, + "learning_rate": 0.00014232954545454548, + "loss": 1.328, "step": 406 }, { - "epoch": 8.24, - "learning_rate": 0.00014808673469387756, - "loss": 1.4607, + "epoch": 4.6, + "learning_rate": 0.0001421875, + "loss": 1.3458, "step": 407 }, { - "epoch": 8.26, - "learning_rate": 0.0001479591836734694, - "loss": 1.4282, + "epoch": 4.61, + "learning_rate": 0.00014204545454545454, + "loss": 1.3425, "step": 408 }, { - "epoch": 8.28, - "learning_rate": 0.00014783163265306125, - "loss": 1.4519, + "epoch": 4.62, + "learning_rate": 0.0001419034090909091, + "loss": 1.3236, "step": 409 }, { - "epoch": 8.3, - "learning_rate": 0.00014770408163265305, - "loss": 1.475, + "epoch": 4.63, + "learning_rate": 0.00014176136363636366, + "loss": 1.3439, "step": 410 }, { - "epoch": 8.32, - "learning_rate": 0.0001475765306122449, - "loss": 1.5425, + "epoch": 4.64, + "learning_rate": 0.0001416193181818182, + "loss": 1.3397, "step": 411 }, { - "epoch": 8.34, - "learning_rate": 0.00014744897959183674, - "loss": 1.5407, + "epoch": 4.65, + "learning_rate": 0.00014147727272727275, + "loss": 1.329, "step": 412 }, { - "epoch": 8.36, - "learning_rate": 0.0001473214285714286, - "loss": 1.5698, + "epoch": 4.67, + "learning_rate": 0.00014133522727272728, + "loss": 1.3377, "step": 413 }, { - "epoch": 8.38, - "learning_rate": 0.0001471938775510204, - "loss": 1.4282, + "epoch": 4.68, + "learning_rate": 0.0001411931818181818, + "loss": 1.343, "step": 414 }, { - "epoch": 8.4, - "learning_rate": 0.00014706632653061226, - "loss": 1.5301, + "epoch": 4.69, + "learning_rate": 0.00014105113636363637, + "loss": 1.3185, "step": 415 }, { - "epoch": 8.42, - "learning_rate": 0.0001469387755102041, - "loss": 1.5083, + "epoch": 4.7, + "learning_rate": 0.00014090909090909093, + "loss": 1.3174, "step": 416 }, { - "epoch": 8.44, - "learning_rate": 0.00014681122448979592, - "loss": 1.5712, + "epoch": 4.71, + "learning_rate": 0.00014076704545454546, + "loss": 1.3231, "step": 417 }, { - "epoch": 8.46, - "learning_rate": 0.00014668367346938775, - "loss": 1.4363, + "epoch": 4.72, + "learning_rate": 0.00014062500000000002, + "loss": 1.3407, "step": 418 }, { - "epoch": 8.48, - "learning_rate": 0.0001465561224489796, - "loss": 1.4463, + "epoch": 4.73, + "learning_rate": 0.00014048295454545455, + "loss": 1.3138, "step": 419 }, { - "epoch": 8.5, - "learning_rate": 0.00014642857142857141, - "loss": 1.4738, + "epoch": 4.74, + "learning_rate": 0.00014034090909090908, + "loss": 1.3134, "step": 420 }, { - "epoch": 8.52, - "learning_rate": 0.00014630102040816327, - "loss": 1.5396, + "epoch": 4.76, + "learning_rate": 0.00014019886363636367, + "loss": 1.3187, "step": 421 }, { - "epoch": 8.54, - "learning_rate": 0.0001461734693877551, - "loss": 1.4384, + "epoch": 4.77, + "learning_rate": 0.0001400568181818182, + "loss": 1.2781, "step": 422 }, { - "epoch": 8.56, - "learning_rate": 0.00014604591836734696, - "loss": 1.5345, + "epoch": 4.78, + "learning_rate": 0.00013991477272727273, + "loss": 1.3254, "step": 423 }, { - "epoch": 8.58, - "learning_rate": 0.0001459183673469388, - "loss": 1.5355, + "epoch": 4.79, + "learning_rate": 0.0001397727272727273, + "loss": 1.2929, "step": 424 }, { - "epoch": 8.6, - "learning_rate": 0.00014579081632653062, - "loss": 1.5188, + "epoch": 4.8, + "learning_rate": 0.00013963068181818182, + "loss": 1.2953, "step": 425 }, { - "epoch": 8.62, - "learning_rate": 0.00014566326530612245, - "loss": 1.5575, + "epoch": 4.81, + "learning_rate": 0.00013948863636363635, + "loss": 1.3202, "step": 426 }, { - "epoch": 8.64, - "learning_rate": 0.00014553571428571428, - "loss": 1.5279, + "epoch": 4.82, + "learning_rate": 0.00013934659090909094, + "loss": 1.3118, "step": 427 }, { - "epoch": 8.66, - "learning_rate": 0.00014540816326530614, - "loss": 1.5484, + "epoch": 4.84, + "learning_rate": 0.00013920454545454547, + "loss": 1.3046, "step": 428 }, { - "epoch": 8.68, - "learning_rate": 0.00014528061224489797, - "loss": 1.4878, + "epoch": 4.85, + "learning_rate": 0.0001390625, + "loss": 1.2708, "step": 429 }, { - "epoch": 8.7, - "learning_rate": 0.0001451530612244898, - "loss": 1.503, + "epoch": 4.86, + "learning_rate": 0.00013892045454545456, + "loss": 1.2835, "step": 430 }, { - "epoch": 8.72, - "learning_rate": 0.00014502551020408163, - "loss": 1.4723, + "epoch": 4.87, + "learning_rate": 0.0001387784090909091, + "loss": 1.2728, "step": 431 }, { - "epoch": 8.74, - "learning_rate": 0.0001448979591836735, - "loss": 1.5579, + "epoch": 4.88, + "learning_rate": 0.00013863636363636365, + "loss": 1.3107, "step": 432 }, { - "epoch": 8.76, - "learning_rate": 0.0001447704081632653, - "loss": 1.4789, + "epoch": 4.89, + "learning_rate": 0.0001384943181818182, + "loss": 1.2615, "step": 433 }, { - "epoch": 8.78, - "learning_rate": 0.00014464285714285715, - "loss": 1.5501, + "epoch": 4.9, + "learning_rate": 0.00013835227272727274, + "loss": 1.2754, "step": 434 }, { - "epoch": 8.8, - "learning_rate": 0.00014451530612244899, - "loss": 1.5204, + "epoch": 4.91, + "learning_rate": 0.00013821022727272727, + "loss": 1.3018, "step": 435 }, { - "epoch": 8.82, - "learning_rate": 0.00014438775510204084, - "loss": 1.5489, + "epoch": 4.93, + "learning_rate": 0.00013806818181818183, + "loss": 1.2878, "step": 436 }, { - "epoch": 8.84, - "learning_rate": 0.00014426020408163265, - "loss": 1.5464, + "epoch": 4.94, + "learning_rate": 0.00013792613636363636, + "loss": 1.2595, "step": 437 }, { - "epoch": 8.86, - "learning_rate": 0.0001441326530612245, - "loss": 1.5896, + "epoch": 4.95, + "learning_rate": 0.00013778409090909092, + "loss": 1.2688, "step": 438 }, { - "epoch": 8.88, - "learning_rate": 0.00014400510204081634, - "loss": 1.5465, + "epoch": 4.96, + "learning_rate": 0.00013764204545454548, + "loss": 1.2669, "step": 439 }, { - "epoch": 8.9, - "learning_rate": 0.00014387755102040817, - "loss": 1.5094, + "epoch": 4.97, + "learning_rate": 0.0001375, + "loss": 1.2861, "step": 440 }, { - "epoch": 8.92, - "learning_rate": 0.00014375, - "loss": 1.5144, + "epoch": 4.98, + "learning_rate": 0.00013735795454545454, + "loss": 1.2536, "step": 441 }, { - "epoch": 8.94, - "learning_rate": 0.00014362244897959186, - "loss": 1.4919, + "epoch": 4.99, + "learning_rate": 0.0001372159090909091, + "loss": 1.2584, "step": 442 }, { - "epoch": 8.96, - "learning_rate": 0.00014349489795918366, - "loss": 1.4702, + "epoch": 5.0, + "learning_rate": 0.00013707386363636366, + "loss": 1.2203, "step": 443 }, { - "epoch": 8.98, - "learning_rate": 0.00014336734693877552, - "loss": 1.4996, + "epoch": 5.02, + "learning_rate": 0.0001369318181818182, + "loss": 1.1796, "step": 444 }, { - "epoch": 9.0, - "learning_rate": 0.00014323979591836735, - "loss": 1.5503, + "epoch": 5.03, + "learning_rate": 0.00013678977272727275, + "loss": 1.1856, "step": 445 }, { - "epoch": 9.02, - "learning_rate": 0.00014311224489795918, - "loss": 1.4125, + "epoch": 5.04, + "learning_rate": 0.00013664772727272728, + "loss": 1.1801, "step": 446 }, { - "epoch": 9.04, - "learning_rate": 0.00014298469387755104, - "loss": 1.4722, + "epoch": 5.05, + "learning_rate": 0.0001365056818181818, + "loss": 1.1761, "step": 447 }, { - "epoch": 9.06, - "learning_rate": 0.00014285714285714287, - "loss": 1.5199, + "epoch": 5.06, + "learning_rate": 0.00013636363636363637, + "loss": 1.1495, "step": 448 }, { - "epoch": 9.09, - "learning_rate": 0.0001427295918367347, - "loss": 1.4571, + "epoch": 5.07, + "learning_rate": 0.00013622159090909093, + "loss": 1.1903, "step": 449 }, { - "epoch": 9.11, - "learning_rate": 0.00014260204081632653, - "loss": 1.4996, + "epoch": 5.08, + "learning_rate": 0.00013607954545454546, + "loss": 1.1778, "step": 450 }, { - "epoch": 9.13, - "learning_rate": 0.0001424744897959184, - "loss": 1.4092, + "epoch": 5.1, + "learning_rate": 0.00013593750000000002, + "loss": 1.1902, "step": 451 }, { - "epoch": 9.15, - "learning_rate": 0.00014234693877551022, - "loss": 1.4198, + "epoch": 5.11, + "learning_rate": 0.00013579545454545455, + "loss": 1.1597, "step": 452 }, { - "epoch": 9.17, - "learning_rate": 0.00014221938775510205, - "loss": 1.4916, + "epoch": 5.12, + "learning_rate": 0.00013565340909090908, + "loss": 1.1529, "step": 453 }, { - "epoch": 9.19, - "learning_rate": 0.00014209183673469388, - "loss": 1.5051, + "epoch": 5.13, + "learning_rate": 0.00013551136363636364, + "loss": 1.1627, "step": 454 }, { - "epoch": 9.21, - "learning_rate": 0.00014196428571428574, - "loss": 1.4321, + "epoch": 5.14, + "learning_rate": 0.0001353693181818182, + "loss": 1.1613, "step": 455 }, { - "epoch": 9.23, - "learning_rate": 0.00014183673469387754, - "loss": 1.4097, + "epoch": 5.15, + "learning_rate": 0.00013522727272727273, + "loss": 1.1336, "step": 456 }, { - "epoch": 9.25, - "learning_rate": 0.0001417091836734694, - "loss": 1.4853, + "epoch": 5.16, + "learning_rate": 0.0001350852272727273, + "loss": 1.1369, "step": 457 }, { - "epoch": 9.27, - "learning_rate": 0.00014158163265306123, - "loss": 1.4593, + "epoch": 5.17, + "learning_rate": 0.00013494318181818182, + "loss": 1.1592, "step": 458 }, { - "epoch": 9.29, - "learning_rate": 0.00014145408163265306, - "loss": 1.3729, + "epoch": 5.19, + "learning_rate": 0.00013480113636363635, + "loss": 1.1482, "step": 459 }, { - "epoch": 9.31, - "learning_rate": 0.0001413265306122449, - "loss": 1.4467, + "epoch": 5.2, + "learning_rate": 0.00013465909090909094, + "loss": 1.1857, "step": 460 }, { - "epoch": 9.33, - "learning_rate": 0.00014119897959183675, - "loss": 1.4467, + "epoch": 5.21, + "learning_rate": 0.00013451704545454547, + "loss": 1.1651, "step": 461 }, { - "epoch": 9.35, - "learning_rate": 0.00014107142857142858, - "loss": 1.4785, + "epoch": 5.22, + "learning_rate": 0.000134375, + "loss": 1.1544, "step": 462 }, { - "epoch": 9.37, - "learning_rate": 0.0001409438775510204, - "loss": 1.4089, + "epoch": 5.23, + "learning_rate": 0.00013423295454545456, + "loss": 1.125, "step": 463 }, { - "epoch": 9.39, - "learning_rate": 0.00014081632653061224, - "loss": 1.5026, + "epoch": 5.24, + "learning_rate": 0.0001340909090909091, + "loss": 1.167, "step": 464 }, { - "epoch": 9.41, - "learning_rate": 0.0001406887755102041, - "loss": 1.4857, + "epoch": 5.25, + "learning_rate": 0.00013394886363636365, + "loss": 1.1316, "step": 465 }, { - "epoch": 9.43, - "learning_rate": 0.0001405612244897959, - "loss": 1.3745, + "epoch": 5.26, + "learning_rate": 0.0001338068181818182, + "loss": 1.1604, "step": 466 }, { - "epoch": 9.45, - "learning_rate": 0.00014043367346938776, - "loss": 1.4733, + "epoch": 5.28, + "learning_rate": 0.00013366477272727274, + "loss": 1.2005, "step": 467 }, { - "epoch": 9.47, - "learning_rate": 0.0001403061224489796, - "loss": 1.5212, + "epoch": 5.29, + "learning_rate": 0.00013352272727272727, + "loss": 1.1496, "step": 468 }, { - "epoch": 9.49, - "learning_rate": 0.00014017857142857142, - "loss": 1.5398, + "epoch": 5.3, + "learning_rate": 0.00013338068181818183, + "loss": 1.1331, "step": 469 }, { - "epoch": 9.51, - "learning_rate": 0.00014005102040816328, - "loss": 1.478, + "epoch": 5.31, + "learning_rate": 0.00013323863636363636, + "loss": 1.1414, "step": 470 }, { - "epoch": 9.53, - "learning_rate": 0.0001399234693877551, - "loss": 1.496, + "epoch": 5.32, + "learning_rate": 0.00013309659090909092, + "loss": 1.0945, "step": 471 }, { - "epoch": 9.55, - "learning_rate": 0.00013979591836734694, - "loss": 1.4837, + "epoch": 5.33, + "learning_rate": 0.00013295454545454548, + "loss": 1.1305, "step": 472 }, { - "epoch": 9.57, - "learning_rate": 0.00013966836734693878, - "loss": 1.4724, + "epoch": 5.34, + "learning_rate": 0.0001328125, + "loss": 1.1293, "step": 473 }, { - "epoch": 9.59, - "learning_rate": 0.00013954081632653063, - "loss": 1.4828, + "epoch": 5.35, + "learning_rate": 0.00013267045454545454, + "loss": 1.163, "step": 474 }, { - "epoch": 9.61, - "learning_rate": 0.00013941326530612246, - "loss": 1.5012, + "epoch": 5.37, + "learning_rate": 0.0001325284090909091, + "loss": 1.1236, "step": 475 }, { - "epoch": 9.63, - "learning_rate": 0.0001392857142857143, - "loss": 1.4879, + "epoch": 5.38, + "learning_rate": 0.00013238636363636366, + "loss": 1.1236, "step": 476 }, { - "epoch": 9.65, - "learning_rate": 0.00013915816326530613, - "loss": 1.4196, + "epoch": 5.39, + "learning_rate": 0.0001322443181818182, + "loss": 1.1228, "step": 477 }, { - "epoch": 9.67, - "learning_rate": 0.00013903061224489798, - "loss": 1.4915, + "epoch": 5.4, + "learning_rate": 0.00013210227272727275, + "loss": 1.0993, "step": 478 }, { - "epoch": 9.69, - "learning_rate": 0.0001389030612244898, - "loss": 1.3878, + "epoch": 5.41, + "learning_rate": 0.00013196022727272728, + "loss": 1.1139, "step": 479 }, { - "epoch": 9.71, - "learning_rate": 0.00013877551020408165, - "loss": 1.466, + "epoch": 5.42, + "learning_rate": 0.0001318181818181818, + "loss": 1.1019, "step": 480 }, { - "epoch": 9.73, - "learning_rate": 0.00013864795918367348, - "loss": 1.4582, + "epoch": 5.43, + "learning_rate": 0.00013167613636363637, + "loss": 1.0935, "step": 481 }, { - "epoch": 9.75, - "learning_rate": 0.0001385204081632653, - "loss": 1.533, + "epoch": 5.45, + "learning_rate": 0.00013153409090909093, + "loss": 1.1067, "step": 482 }, { - "epoch": 9.77, - "learning_rate": 0.00013839285714285714, - "loss": 1.4697, + "epoch": 5.46, + "learning_rate": 0.00013139204545454546, + "loss": 1.0848, "step": 483 }, { - "epoch": 9.79, - "learning_rate": 0.000138265306122449, - "loss": 1.3989, + "epoch": 5.47, + "learning_rate": 0.00013125000000000002, + "loss": 1.1188, "step": 484 }, { - "epoch": 9.81, - "learning_rate": 0.00013813775510204083, - "loss": 1.4361, + "epoch": 5.48, + "learning_rate": 0.00013110795454545455, + "loss": 1.1275, "step": 485 }, { - "epoch": 9.83, - "learning_rate": 0.00013801020408163266, - "loss": 1.5271, + "epoch": 5.49, + "learning_rate": 0.00013096590909090908, + "loss": 1.1211, "step": 486 }, { - "epoch": 9.85, - "learning_rate": 0.0001378826530612245, - "loss": 1.4905, + "epoch": 5.5, + "learning_rate": 0.00013082386363636364, + "loss": 1.1049, "step": 487 }, { - "epoch": 9.87, - "learning_rate": 0.00013775510204081635, - "loss": 1.4757, + "epoch": 5.51, + "learning_rate": 0.0001306818181818182, + "loss": 1.1057, "step": 488 }, { - "epoch": 9.89, - "learning_rate": 0.00013762755102040815, - "loss": 1.5485, + "epoch": 5.52, + "learning_rate": 0.00013053977272727273, + "loss": 1.0909, "step": 489 }, { - "epoch": 9.91, - "learning_rate": 0.0001375, - "loss": 1.4783, + "epoch": 5.54, + "learning_rate": 0.0001303977272727273, + "loss": 1.1138, "step": 490 }, { - "epoch": 9.93, - "learning_rate": 0.00013737244897959184, - "loss": 1.4849, + "epoch": 5.55, + "learning_rate": 0.00013025568181818182, + "loss": 1.1094, "step": 491 }, { - "epoch": 9.96, - "learning_rate": 0.00013724489795918367, - "loss": 1.5382, + "epoch": 5.56, + "learning_rate": 0.00013011363636363635, + "loss": 1.1187, "step": 492 }, { - "epoch": 9.98, - "learning_rate": 0.00013711734693877553, - "loss": 1.4902, + "epoch": 5.57, + "learning_rate": 0.0001299715909090909, + "loss": 1.1039, "step": 493 }, { - "epoch": 10.0, - "learning_rate": 0.00013698979591836736, - "loss": 1.4865, + "epoch": 5.58, + "learning_rate": 0.00012982954545454547, + "loss": 1.056, "step": 494 }, { - "epoch": 10.02, - "learning_rate": 0.0001368622448979592, - "loss": 1.4436, + "epoch": 5.59, + "learning_rate": 0.0001296875, + "loss": 1.0842, "step": 495 }, { - "epoch": 10.04, - "learning_rate": 0.00013673469387755102, - "loss": 1.408, + "epoch": 5.6, + "learning_rate": 0.00012954545454545456, + "loss": 1.0749, "step": 496 }, { - "epoch": 10.06, - "learning_rate": 0.00013660714285714288, - "loss": 1.4764, + "epoch": 5.61, + "learning_rate": 0.0001294034090909091, + "loss": 1.1121, "step": 497 }, { - "epoch": 10.08, - "learning_rate": 0.0001364795918367347, - "loss": 1.4646, + "epoch": 5.63, + "learning_rate": 0.00012926136363636365, + "loss": 1.0772, "step": 498 }, { - "epoch": 10.1, - "learning_rate": 0.00013635204081632654, - "loss": 1.406, + "epoch": 5.64, + "learning_rate": 0.00012911931818181818, + "loss": 1.0845, "step": 499 }, { - "epoch": 10.12, - "learning_rate": 0.00013622448979591837, - "loss": 1.4785, + "epoch": 5.65, + "learning_rate": 0.00012897727272727274, + "loss": 1.0534, "step": 500 }, { - "epoch": 10.14, - "learning_rate": 0.00013609693877551023, - "loss": 1.4117, + "epoch": 5.66, + "learning_rate": 0.00012883522727272727, + "loss": 1.0755, "step": 501 }, { - "epoch": 10.16, - "learning_rate": 0.00013596938775510203, - "loss": 1.4108, + "epoch": 5.67, + "learning_rate": 0.00012869318181818183, + "loss": 1.0755, "step": 502 }, { - "epoch": 10.18, - "learning_rate": 0.0001358418367346939, - "loss": 1.4155, + "epoch": 5.68, + "learning_rate": 0.00012855113636363636, + "loss": 1.0869, "step": 503 }, { - "epoch": 10.2, - "learning_rate": 0.00013571428571428572, - "loss": 1.4021, + "epoch": 5.69, + "learning_rate": 0.00012840909090909092, + "loss": 1.0673, "step": 504 }, { - "epoch": 10.22, - "learning_rate": 0.00013558673469387755, - "loss": 1.411, + "epoch": 5.71, + "learning_rate": 0.00012826704545454545, + "loss": 1.0692, "step": 505 }, { - "epoch": 10.24, - "learning_rate": 0.00013545918367346938, - "loss": 1.3851, + "epoch": 5.72, + "learning_rate": 0.000128125, + "loss": 1.0474, "step": 506 }, { - "epoch": 10.26, - "learning_rate": 0.00013533163265306124, - "loss": 1.387, + "epoch": 5.73, + "learning_rate": 0.00012798295454545454, + "loss": 1.0749, "step": 507 }, { - "epoch": 10.28, - "learning_rate": 0.00013520408163265305, - "loss": 1.4163, + "epoch": 5.74, + "learning_rate": 0.0001278409090909091, + "loss": 1.0519, "step": 508 }, { - "epoch": 10.3, - "learning_rate": 0.0001350765306122449, - "loss": 1.3343, + "epoch": 5.75, + "learning_rate": 0.00012769886363636366, + "loss": 1.0566, "step": 509 }, { - "epoch": 10.32, - "learning_rate": 0.00013494897959183673, - "loss": 1.4811, + "epoch": 5.76, + "learning_rate": 0.0001275568181818182, + "loss": 1.06, "step": 510 }, { - "epoch": 10.34, - "learning_rate": 0.0001348214285714286, - "loss": 1.4086, + "epoch": 5.77, + "learning_rate": 0.00012741477272727272, + "loss": 1.0618, "step": 511 }, { - "epoch": 10.36, - "learning_rate": 0.0001346938775510204, - "loss": 1.3879, + "epoch": 5.78, + "learning_rate": 0.00012727272727272728, + "loss": 1.0643, "step": 512 }, { - "epoch": 10.38, - "learning_rate": 0.00013456632653061225, - "loss": 1.4204, + "epoch": 5.8, + "learning_rate": 0.0001271306818181818, + "loss": 1.026, "step": 513 }, { - "epoch": 10.4, - "learning_rate": 0.00013443877551020408, - "loss": 1.4158, + "epoch": 5.81, + "learning_rate": 0.00012698863636363637, + "loss": 1.0335, "step": 514 }, { - "epoch": 10.42, - "learning_rate": 0.00013431122448979592, - "loss": 1.4521, + "epoch": 5.82, + "learning_rate": 0.00012684659090909093, + "loss": 1.0205, "step": 515 }, { - "epoch": 10.44, - "learning_rate": 0.00013418367346938777, - "loss": 1.4196, + "epoch": 5.83, + "learning_rate": 0.00012670454545454546, + "loss": 1.0594, "step": 516 }, { - "epoch": 10.46, - "learning_rate": 0.0001340561224489796, - "loss": 1.4361, + "epoch": 5.84, + "learning_rate": 0.0001265625, + "loss": 1.0136, "step": 517 }, { - "epoch": 10.48, - "learning_rate": 0.00013392857142857144, - "loss": 1.4482, + "epoch": 5.85, + "learning_rate": 0.00012642045454545455, + "loss": 1.0244, "step": 518 }, { - "epoch": 10.5, - "learning_rate": 0.00013380102040816327, - "loss": 1.4801, + "epoch": 5.86, + "learning_rate": 0.00012627840909090908, + "loss": 1.0569, "step": 519 }, { - "epoch": 10.52, - "learning_rate": 0.00013367346938775512, - "loss": 1.4556, + "epoch": 5.87, + "learning_rate": 0.00012613636363636364, + "loss": 1.0416, "step": 520 }, { - "epoch": 10.54, - "learning_rate": 0.00013354591836734695, - "loss": 1.3902, + "epoch": 5.89, + "learning_rate": 0.0001259943181818182, + "loss": 0.9884, "step": 521 }, { - "epoch": 10.56, - "learning_rate": 0.00013341836734693879, - "loss": 1.4269, + "epoch": 5.9, + "learning_rate": 0.00012585227272727273, + "loss": 1.0351, "step": 522 }, { - "epoch": 10.58, - "learning_rate": 0.00013329081632653062, - "loss": 1.4899, + "epoch": 5.91, + "learning_rate": 0.00012571022727272726, + "loss": 1.0037, "step": 523 }, { - "epoch": 10.6, - "learning_rate": 0.00013316326530612247, - "loss": 1.3952, + "epoch": 5.92, + "learning_rate": 0.00012556818181818182, + "loss": 1.0219, "step": 524 }, { - "epoch": 10.62, - "learning_rate": 0.00013303571428571428, - "loss": 1.4116, + "epoch": 5.93, + "learning_rate": 0.00012542613636363635, + "loss": 1.0533, "step": 525 }, { - "epoch": 10.64, - "learning_rate": 0.00013290816326530614, - "loss": 1.4583, + "epoch": 5.94, + "learning_rate": 0.0001252840909090909, + "loss": 1.0031, "step": 526 }, { - "epoch": 10.66, - "learning_rate": 0.00013278061224489797, - "loss": 1.4466, + "epoch": 5.95, + "learning_rate": 0.00012514204545454547, + "loss": 1.0454, "step": 527 }, { - "epoch": 10.68, - "learning_rate": 0.0001326530612244898, - "loss": 1.4242, + "epoch": 5.97, + "learning_rate": 0.000125, + "loss": 1.0195, "step": 528 }, { - "epoch": 10.7, - "learning_rate": 0.00013252551020408163, - "loss": 1.3717, + "epoch": 5.98, + "learning_rate": 0.00012485795454545453, + "loss": 1.0076, "step": 529 }, { - "epoch": 10.72, - "learning_rate": 0.0001323979591836735, - "loss": 1.4583, + "epoch": 5.99, + "learning_rate": 0.0001247159090909091, + "loss": 1.0378, "step": 530 }, { - "epoch": 10.74, - "learning_rate": 0.0001322704081632653, - "loss": 1.4185, + "epoch": 6.0, + "learning_rate": 0.00012457386363636365, + "loss": 0.9795, "step": 531 }, { - "epoch": 10.76, - "learning_rate": 0.00013214285714285715, - "loss": 1.4287, + "epoch": 6.01, + "learning_rate": 0.00012443181818181818, + "loss": 0.9405, "step": 532 }, { - "epoch": 10.78, - "learning_rate": 0.00013201530612244898, - "loss": 1.4385, + "epoch": 6.02, + "learning_rate": 0.00012428977272727274, + "loss": 0.9503, "step": 533 }, { - "epoch": 10.8, - "learning_rate": 0.00013188775510204084, - "loss": 1.453, + "epoch": 6.03, + "learning_rate": 0.00012414772727272727, + "loss": 0.9456, "step": 534 }, { - "epoch": 10.83, - "learning_rate": 0.00013176020408163264, - "loss": 1.4161, + "epoch": 6.04, + "learning_rate": 0.0001240056818181818, + "loss": 0.9536, "step": 535 }, { - "epoch": 10.85, - "learning_rate": 0.0001316326530612245, - "loss": 1.457, + "epoch": 6.06, + "learning_rate": 0.00012386363636363636, + "loss": 0.9412, "step": 536 }, { - "epoch": 10.87, - "learning_rate": 0.00013150510204081633, - "loss": 1.4367, + "epoch": 6.07, + "learning_rate": 0.00012372159090909092, + "loss": 0.9315, "step": 537 }, { - "epoch": 10.89, - "learning_rate": 0.00013137755102040816, - "loss": 1.4256, + "epoch": 6.08, + "learning_rate": 0.00012357954545454545, + "loss": 0.9486, "step": 538 }, { - "epoch": 10.91, - "learning_rate": 0.00013125000000000002, - "loss": 1.424, + "epoch": 6.09, + "learning_rate": 0.0001234375, + "loss": 0.9405, "step": 539 }, { - "epoch": 10.93, - "learning_rate": 0.00013112244897959185, - "loss": 1.3923, + "epoch": 6.1, + "learning_rate": 0.00012329545454545454, + "loss": 0.9269, "step": 540 }, { - "epoch": 10.95, - "learning_rate": 0.00013099489795918368, - "loss": 1.4225, + "epoch": 6.11, + "learning_rate": 0.0001231534090909091, + "loss": 0.9378, "step": 541 }, { - "epoch": 10.97, - "learning_rate": 0.0001308673469387755, - "loss": 1.3969, + "epoch": 6.12, + "learning_rate": 0.00012301136363636366, + "loss": 0.9431, "step": 542 }, { - "epoch": 10.99, - "learning_rate": 0.00013073979591836737, - "loss": 1.4446, + "epoch": 6.13, + "learning_rate": 0.0001228693181818182, + "loss": 0.9256, "step": 543 }, { - "epoch": 11.01, - "learning_rate": 0.00013061224489795917, - "loss": 1.4375, + "epoch": 6.15, + "learning_rate": 0.00012272727272727272, + "loss": 0.919, "step": 544 }, { - "epoch": 11.03, - "learning_rate": 0.00013048469387755103, - "loss": 1.4064, + "epoch": 6.16, + "learning_rate": 0.00012258522727272728, + "loss": 0.9188, "step": 545 }, { - "epoch": 11.05, - "learning_rate": 0.00013035714285714286, - "loss": 1.3454, + "epoch": 6.17, + "learning_rate": 0.00012244318181818181, + "loss": 0.9447, "step": 546 }, { - "epoch": 11.07, - "learning_rate": 0.00013022959183673472, - "loss": 1.3234, + "epoch": 6.18, + "learning_rate": 0.00012230113636363637, + "loss": 0.9261, "step": 547 }, { - "epoch": 11.09, - "learning_rate": 0.00013010204081632652, - "loss": 1.3759, + "epoch": 6.19, + "learning_rate": 0.00012215909090909093, + "loss": 0.9302, "step": 548 }, { - "epoch": 11.11, - "learning_rate": 0.00012997448979591838, - "loss": 1.4221, + "epoch": 6.2, + "learning_rate": 0.00012201704545454546, + "loss": 0.9161, "step": 549 }, { - "epoch": 11.13, - "learning_rate": 0.0001298469387755102, - "loss": 1.4261, + "epoch": 6.21, + "learning_rate": 0.00012187500000000001, + "loss": 0.9521, "step": 550 }, { - "epoch": 11.15, - "learning_rate": 0.00012971938775510204, - "loss": 1.3341, + "epoch": 6.22, + "learning_rate": 0.00012173295454545455, + "loss": 0.9026, "step": 551 }, { - "epoch": 11.17, - "learning_rate": 0.00012959183673469387, - "loss": 1.3994, + "epoch": 6.24, + "learning_rate": 0.00012159090909090908, + "loss": 0.9361, "step": 552 }, { - "epoch": 11.19, - "learning_rate": 0.00012946428571428573, - "loss": 1.3894, + "epoch": 6.25, + "learning_rate": 0.00012144886363636366, + "loss": 0.8944, "step": 553 }, { - "epoch": 11.21, - "learning_rate": 0.00012933673469387754, - "loss": 1.3585, + "epoch": 6.26, + "learning_rate": 0.00012130681818181819, + "loss": 0.895, "step": 554 }, { - "epoch": 11.23, - "learning_rate": 0.0001292091836734694, - "loss": 1.3763, + "epoch": 6.27, + "learning_rate": 0.00012116477272727273, + "loss": 0.8956, "step": 555 }, { - "epoch": 11.25, - "learning_rate": 0.00012908163265306123, - "loss": 1.3623, + "epoch": 6.28, + "learning_rate": 0.00012102272727272728, + "loss": 0.8998, "step": 556 }, { - "epoch": 11.27, - "learning_rate": 0.00012895408163265306, - "loss": 1.3907, + "epoch": 6.29, + "learning_rate": 0.00012088068181818182, + "loss": 0.915, "step": 557 }, { - "epoch": 11.29, - "learning_rate": 0.0001288265306122449, - "loss": 1.3807, + "epoch": 6.3, + "learning_rate": 0.00012073863636363636, + "loss": 0.9282, "step": 558 }, { - "epoch": 11.31, - "learning_rate": 0.00012869897959183674, - "loss": 1.4045, + "epoch": 6.32, + "learning_rate": 0.00012059659090909093, + "loss": 0.8938, "step": 559 }, { - "epoch": 11.33, - "learning_rate": 0.00012857142857142858, - "loss": 1.4038, + "epoch": 6.33, + "learning_rate": 0.00012045454545454546, + "loss": 0.8886, "step": 560 }, { - "epoch": 11.35, - "learning_rate": 0.0001284438775510204, - "loss": 1.3466, + "epoch": 6.34, + "learning_rate": 0.0001203125, + "loss": 0.8988, "step": 561 }, { - "epoch": 11.37, - "learning_rate": 0.00012831632653061226, - "loss": 1.3449, + "epoch": 6.35, + "learning_rate": 0.00012017045454545455, + "loss": 0.8852, "step": 562 }, { - "epoch": 11.39, - "learning_rate": 0.0001281887755102041, - "loss": 1.3866, + "epoch": 6.36, + "learning_rate": 0.0001200284090909091, + "loss": 0.8818, "step": 563 }, { - "epoch": 11.41, - "learning_rate": 0.00012806122448979593, - "loss": 1.3106, + "epoch": 6.37, + "learning_rate": 0.00011988636363636365, + "loss": 0.8881, "step": 564 }, { - "epoch": 11.43, - "learning_rate": 0.00012793367346938776, - "loss": 1.4414, + "epoch": 6.38, + "learning_rate": 0.0001197443181818182, + "loss": 0.9226, "step": 565 }, { - "epoch": 11.45, - "learning_rate": 0.00012780612244897962, - "loss": 1.3737, + "epoch": 6.39, + "learning_rate": 0.00011960227272727273, + "loss": 0.8849, "step": 566 }, { - "epoch": 11.47, - "learning_rate": 0.00012767857142857142, - "loss": 1.4053, + "epoch": 6.41, + "learning_rate": 0.00011946022727272727, + "loss": 0.8894, "step": 567 }, { - "epoch": 11.49, - "learning_rate": 0.00012755102040816328, - "loss": 1.4561, + "epoch": 6.42, + "learning_rate": 0.00011931818181818182, + "loss": 0.9207, "step": 568 }, { - "epoch": 11.51, - "learning_rate": 0.0001274234693877551, - "loss": 1.3684, + "epoch": 6.43, + "learning_rate": 0.00011917613636363636, + "loss": 0.9105, "step": 569 }, { - "epoch": 11.53, - "learning_rate": 0.00012729591836734697, - "loss": 1.3117, + "epoch": 6.44, + "learning_rate": 0.00011903409090909092, + "loss": 0.8762, "step": 570 }, { - "epoch": 11.55, - "learning_rate": 0.00012716836734693877, - "loss": 1.3474, + "epoch": 6.45, + "learning_rate": 0.00011889204545454547, + "loss": 0.8926, "step": 571 }, { - "epoch": 11.57, - "learning_rate": 0.00012704081632653063, - "loss": 1.3804, + "epoch": 6.46, + "learning_rate": 0.00011875, + "loss": 0.8719, "step": 572 }, { - "epoch": 11.59, - "learning_rate": 0.00012691326530612246, - "loss": 1.3656, + "epoch": 6.47, + "learning_rate": 0.00011860795454545454, + "loss": 0.9198, "step": 573 }, { - "epoch": 11.61, - "learning_rate": 0.0001267857142857143, - "loss": 1.3133, + "epoch": 6.48, + "learning_rate": 0.00011846590909090909, + "loss": 0.8846, "step": 574 }, { - "epoch": 11.63, - "learning_rate": 0.00012665816326530612, - "loss": 1.4077, + "epoch": 6.5, + "learning_rate": 0.00011832386363636365, + "loss": 0.8495, "step": 575 }, { - "epoch": 11.65, - "learning_rate": 0.00012653061224489798, - "loss": 1.4087, + "epoch": 6.51, + "learning_rate": 0.0001181818181818182, + "loss": 0.8953, "step": 576 }, { - "epoch": 11.67, - "learning_rate": 0.00012640306122448978, - "loss": 1.3524, + "epoch": 6.52, + "learning_rate": 0.00011803977272727274, + "loss": 0.8686, "step": 577 }, { - "epoch": 11.7, - "learning_rate": 0.00012627551020408164, - "loss": 1.3481, + "epoch": 6.53, + "learning_rate": 0.00011789772727272727, + "loss": 0.8841, "step": 578 }, { - "epoch": 11.72, - "learning_rate": 0.00012614795918367347, - "loss": 1.4497, + "epoch": 6.54, + "learning_rate": 0.00011775568181818182, + "loss": 0.8681, "step": 579 }, { - "epoch": 11.74, - "learning_rate": 0.0001260204081632653, - "loss": 1.3866, + "epoch": 6.55, + "learning_rate": 0.00011761363636363636, + "loss": 0.8732, "step": 580 }, { - "epoch": 11.76, - "learning_rate": 0.00012589285714285713, - "loss": 1.42, + "epoch": 6.56, + "learning_rate": 0.00011747159090909092, + "loss": 0.8582, "step": 581 }, { - "epoch": 11.78, - "learning_rate": 0.000125765306122449, - "loss": 1.3562, + "epoch": 6.58, + "learning_rate": 0.00011732954545454546, + "loss": 0.8744, "step": 582 }, { - "epoch": 11.8, - "learning_rate": 0.00012563775510204082, - "loss": 1.3249, + "epoch": 6.59, + "learning_rate": 0.00011718750000000001, + "loss": 0.8694, "step": 583 }, { - "epoch": 11.82, - "learning_rate": 0.00012551020408163265, - "loss": 1.4277, + "epoch": 6.6, + "learning_rate": 0.00011704545454545454, + "loss": 0.8565, "step": 584 }, { - "epoch": 11.84, - "learning_rate": 0.0001253826530612245, - "loss": 1.3734, + "epoch": 6.61, + "learning_rate": 0.00011690340909090909, + "loss": 0.8584, "step": 585 }, { - "epoch": 11.86, - "learning_rate": 0.00012525510204081634, - "loss": 1.3765, + "epoch": 6.62, + "learning_rate": 0.00011676136363636366, + "loss": 0.8859, "step": 586 }, { - "epoch": 11.88, - "learning_rate": 0.00012512755102040817, - "loss": 1.4153, + "epoch": 6.63, + "learning_rate": 0.00011661931818181819, + "loss": 0.8452, "step": 587 }, { - "epoch": 11.9, - "learning_rate": 0.000125, - "loss": 1.3847, + "epoch": 6.64, + "learning_rate": 0.00011647727272727273, + "loss": 0.8323, "step": 588 }, { - "epoch": 11.92, - "learning_rate": 0.00012487244897959186, - "loss": 1.3824, + "epoch": 6.65, + "learning_rate": 0.00011633522727272728, + "loss": 0.8548, "step": 589 }, { - "epoch": 11.94, - "learning_rate": 0.00012474489795918366, - "loss": 1.3938, + "epoch": 6.67, + "learning_rate": 0.00011619318181818181, + "loss": 0.8506, "step": 590 }, { - "epoch": 11.96, - "learning_rate": 0.00012461734693877552, - "loss": 1.4143, + "epoch": 6.68, + "learning_rate": 0.00011605113636363636, + "loss": 0.8556, "step": 591 }, { - "epoch": 11.98, - "learning_rate": 0.00012448979591836735, - "loss": 1.3794, + "epoch": 6.69, + "learning_rate": 0.00011590909090909093, + "loss": 0.8459, "step": 592 }, { - "epoch": 12.0, - "learning_rate": 0.00012436224489795918, - "loss": 1.3755, + "epoch": 6.7, + "learning_rate": 0.00011576704545454546, + "loss": 0.8432, "step": 593 }, { - "epoch": 12.02, - "learning_rate": 0.00012423469387755101, - "loss": 1.3736, + "epoch": 6.71, + "learning_rate": 0.000115625, + "loss": 0.8645, "step": 594 }, { - "epoch": 12.04, - "learning_rate": 0.00012410714285714287, - "loss": 1.2957, + "epoch": 6.72, + "learning_rate": 0.00011548295454545455, + "loss": 0.86, "step": 595 }, { - "epoch": 12.06, - "learning_rate": 0.0001239795918367347, - "loss": 1.2996, + "epoch": 6.73, + "learning_rate": 0.00011534090909090908, + "loss": 0.8161, "step": 596 }, { - "epoch": 12.08, - "learning_rate": 0.00012385204081632653, - "loss": 1.3648, + "epoch": 6.74, + "learning_rate": 0.00011519886363636365, + "loss": 0.8133, "step": 597 }, { - "epoch": 12.1, - "learning_rate": 0.00012372448979591837, - "loss": 1.3031, + "epoch": 6.76, + "learning_rate": 0.0001150568181818182, + "loss": 0.8372, "step": 598 }, { - "epoch": 12.12, - "learning_rate": 0.00012359693877551022, - "loss": 1.2933, + "epoch": 6.77, + "learning_rate": 0.00011491477272727273, + "loss": 0.8222, "step": 599 }, { - "epoch": 12.14, - "learning_rate": 0.00012346938775510203, - "loss": 1.322, + "epoch": 6.78, + "learning_rate": 0.00011477272727272728, + "loss": 0.8372, "step": 600 }, { - "epoch": 12.16, - "learning_rate": 0.00012334183673469389, - "loss": 1.3123, + "epoch": 6.79, + "learning_rate": 0.00011463068181818182, + "loss": 0.837, "step": 601 }, { - "epoch": 12.18, - "learning_rate": 0.00012321428571428572, - "loss": 1.3187, + "epoch": 6.8, + "learning_rate": 0.00011448863636363637, + "loss": 0.8406, "step": 602 }, { - "epoch": 12.2, - "learning_rate": 0.00012308673469387755, - "loss": 1.3353, + "epoch": 6.81, + "learning_rate": 0.00011434659090909092, + "loss": 0.836, "step": 603 }, { - "epoch": 12.22, - "learning_rate": 0.0001229591836734694, - "loss": 1.3221, + "epoch": 6.82, + "learning_rate": 0.00011420454545454547, + "loss": 0.8476, "step": 604 }, { - "epoch": 12.24, - "learning_rate": 0.00012283163265306124, - "loss": 1.3458, + "epoch": 6.83, + "learning_rate": 0.0001140625, + "loss": 0.8368, "step": 605 }, { - "epoch": 12.26, - "learning_rate": 0.00012270408163265307, - "loss": 1.275, + "epoch": 6.85, + "learning_rate": 0.00011392045454545455, + "loss": 0.822, "step": 606 }, { - "epoch": 12.28, - "learning_rate": 0.0001225765306122449, - "loss": 1.3455, + "epoch": 6.86, + "learning_rate": 0.00011377840909090909, + "loss": 0.8107, "step": 607 }, { - "epoch": 12.3, - "learning_rate": 0.00012244897959183676, - "loss": 1.2769, + "epoch": 6.87, + "learning_rate": 0.00011363636363636365, + "loss": 0.8395, "step": 608 }, { - "epoch": 12.32, - "learning_rate": 0.00012232142857142859, - "loss": 1.3201, + "epoch": 6.88, + "learning_rate": 0.0001134943181818182, + "loss": 0.8083, "step": 609 }, { - "epoch": 12.34, - "learning_rate": 0.00012219387755102042, - "loss": 1.3073, + "epoch": 6.89, + "learning_rate": 0.00011335227272727274, + "loss": 0.828, "step": 610 }, { - "epoch": 12.36, - "learning_rate": 0.00012206632653061225, - "loss": 1.3103, + "epoch": 6.9, + "learning_rate": 0.00011321022727272727, + "loss": 0.8494, "step": 611 }, { - "epoch": 12.38, - "learning_rate": 0.00012193877551020409, - "loss": 1.4437, + "epoch": 6.91, + "learning_rate": 0.00011306818181818182, + "loss": 0.8169, "step": 612 }, { - "epoch": 12.4, - "learning_rate": 0.00012181122448979591, - "loss": 1.3086, + "epoch": 6.93, + "learning_rate": 0.00011292613636363636, + "loss": 0.8224, "step": 613 }, { - "epoch": 12.42, - "learning_rate": 0.00012168367346938775, - "loss": 1.3867, + "epoch": 6.94, + "learning_rate": 0.00011278409090909092, + "loss": 0.8173, "step": 614 }, { - "epoch": 12.44, - "learning_rate": 0.0001215561224489796, - "loss": 1.2565, + "epoch": 6.95, + "learning_rate": 0.00011264204545454547, + "loss": 0.7961, "step": 615 }, { - "epoch": 12.46, - "learning_rate": 0.00012142857142857143, - "loss": 1.335, + "epoch": 6.96, + "learning_rate": 0.00011250000000000001, + "loss": 0.7948, "step": 616 }, { - "epoch": 12.48, - "learning_rate": 0.00012130102040816327, - "loss": 1.3423, + "epoch": 6.97, + "learning_rate": 0.00011235795454545454, + "loss": 0.7746, "step": 617 }, { - "epoch": 12.5, - "learning_rate": 0.00012117346938775512, - "loss": 1.3433, + "epoch": 6.98, + "learning_rate": 0.00011221590909090909, + "loss": 0.8325, "step": 618 }, { - "epoch": 12.52, - "learning_rate": 0.00012104591836734695, - "loss": 1.3387, + "epoch": 6.99, + "learning_rate": 0.00011207386363636365, + "loss": 0.8149, "step": 619 }, { - "epoch": 12.55, - "learning_rate": 0.00012091836734693878, - "loss": 1.3923, + "epoch": 7.0, + "learning_rate": 0.00011193181818181819, + "loss": 0.7516, "step": 620 }, { - "epoch": 12.57, - "learning_rate": 0.00012079081632653062, - "loss": 1.3774, + "epoch": 7.02, + "learning_rate": 0.00011178977272727274, + "loss": 0.7571, "step": 621 }, { - "epoch": 12.59, - "learning_rate": 0.00012066326530612247, - "loss": 1.3203, + "epoch": 7.03, + "learning_rate": 0.00011164772727272728, + "loss": 0.7397, "step": 622 }, { - "epoch": 12.61, - "learning_rate": 0.00012053571428571429, - "loss": 1.2924, + "epoch": 7.04, + "learning_rate": 0.00011150568181818181, + "loss": 0.761, "step": 623 }, { - "epoch": 12.63, - "learning_rate": 0.00012040816326530613, - "loss": 1.3292, + "epoch": 7.05, + "learning_rate": 0.00011136363636363636, + "loss": 0.7783, "step": 624 }, { - "epoch": 12.65, - "learning_rate": 0.00012028061224489798, - "loss": 1.3161, + "epoch": 7.06, + "learning_rate": 0.00011122159090909092, + "loss": 0.7571, "step": 625 }, { - "epoch": 12.67, - "learning_rate": 0.00012015306122448979, - "loss": 1.352, + "epoch": 7.07, + "learning_rate": 0.00011107954545454546, + "loss": 0.7628, "step": 626 }, { - "epoch": 12.69, - "learning_rate": 0.00012002551020408164, - "loss": 1.3577, + "epoch": 7.08, + "learning_rate": 0.0001109375, + "loss": 0.7561, "step": 627 }, { - "epoch": 12.71, - "learning_rate": 0.00011989795918367348, - "loss": 1.3575, + "epoch": 7.09, + "learning_rate": 0.00011079545454545455, + "loss": 0.7432, "step": 628 }, { - "epoch": 12.73, - "learning_rate": 0.0001197704081632653, - "loss": 1.3727, + "epoch": 7.11, + "learning_rate": 0.00011065340909090908, + "loss": 0.7245, "step": 629 }, { - "epoch": 12.75, - "learning_rate": 0.00011964285714285714, - "loss": 1.3312, + "epoch": 7.12, + "learning_rate": 0.00011051136363636366, + "loss": 0.7279, "step": 630 }, { - "epoch": 12.77, - "learning_rate": 0.00011951530612244899, - "loss": 1.3378, + "epoch": 7.13, + "learning_rate": 0.00011036931818181819, + "loss": 0.7347, "step": 631 }, { - "epoch": 12.79, - "learning_rate": 0.00011938775510204083, - "loss": 1.295, + "epoch": 7.14, + "learning_rate": 0.00011022727272727273, + "loss": 0.7427, "step": 632 }, { - "epoch": 12.81, - "learning_rate": 0.00011926020408163265, - "loss": 1.3447, + "epoch": 7.15, + "learning_rate": 0.00011008522727272728, + "loss": 0.7339, "step": 633 }, { - "epoch": 12.83, - "learning_rate": 0.0001191326530612245, - "loss": 1.3835, + "epoch": 7.16, + "learning_rate": 0.00010994318181818182, + "loss": 0.7375, "step": 634 }, { - "epoch": 12.85, - "learning_rate": 0.00011900510204081634, - "loss": 1.3222, + "epoch": 7.17, + "learning_rate": 0.00010980113636363635, + "loss": 0.7182, "step": 635 }, { - "epoch": 12.87, - "learning_rate": 0.00011887755102040817, - "loss": 1.2851, + "epoch": 7.19, + "learning_rate": 0.00010965909090909093, + "loss": 0.7452, "step": 636 }, { - "epoch": 12.89, - "learning_rate": 0.00011875, - "loss": 1.2723, + "epoch": 7.2, + "learning_rate": 0.00010951704545454546, + "loss": 0.7565, "step": 637 }, { - "epoch": 12.91, - "learning_rate": 0.00011862244897959184, - "loss": 1.3924, + "epoch": 7.21, + "learning_rate": 0.000109375, + "loss": 0.7296, "step": 638 }, { - "epoch": 12.93, - "learning_rate": 0.00011849489795918368, - "loss": 1.4625, + "epoch": 7.22, + "learning_rate": 0.00010923295454545455, + "loss": 0.7484, "step": 639 }, { - "epoch": 12.95, - "learning_rate": 0.00011836734693877552, - "loss": 1.3245, + "epoch": 7.23, + "learning_rate": 0.00010909090909090909, + "loss": 0.732, "step": 640 }, { - "epoch": 12.97, - "learning_rate": 0.00011823979591836736, - "loss": 1.4042, + "epoch": 7.24, + "learning_rate": 0.00010894886363636365, + "loss": 0.7415, "step": 641 }, { - "epoch": 12.99, - "learning_rate": 0.00011811224489795918, - "loss": 1.3761, + "epoch": 7.25, + "learning_rate": 0.0001088068181818182, + "loss": 0.7344, "step": 642 }, { - "epoch": 13.01, - "learning_rate": 0.00011798469387755103, - "loss": 1.3376, + "epoch": 7.26, + "learning_rate": 0.00010866477272727274, + "loss": 0.7267, "step": 643 }, { - "epoch": 13.03, - "learning_rate": 0.00011785714285714287, - "loss": 1.2174, + "epoch": 7.28, + "learning_rate": 0.00010852272727272727, + "loss": 0.7543, "step": 644 }, { - "epoch": 13.05, - "learning_rate": 0.00011772959183673471, - "loss": 1.3602, + "epoch": 7.29, + "learning_rate": 0.00010838068181818182, + "loss": 0.7266, "step": 645 }, { - "epoch": 13.07, - "learning_rate": 0.00011760204081632653, - "loss": 1.3002, + "epoch": 7.3, + "learning_rate": 0.00010823863636363636, + "loss": 0.7449, "step": 646 }, { - "epoch": 13.09, - "learning_rate": 0.00011747448979591838, - "loss": 1.2262, + "epoch": 7.31, + "learning_rate": 0.00010809659090909092, + "loss": 0.7324, "step": 647 }, { - "epoch": 13.11, - "learning_rate": 0.00011734693877551022, - "loss": 1.3048, + "epoch": 7.32, + "learning_rate": 0.00010795454545454547, + "loss": 0.7268, "step": 648 }, { - "epoch": 13.13, - "learning_rate": 0.00011721938775510204, - "loss": 1.2231, + "epoch": 7.33, + "learning_rate": 0.00010781250000000001, + "loss": 0.7172, "step": 649 }, { - "epoch": 13.15, - "learning_rate": 0.00011709183673469388, - "loss": 1.2996, + "epoch": 7.34, + "learning_rate": 0.00010767045454545454, + "loss": 0.7169, "step": 650 }, { - "epoch": 13.17, - "learning_rate": 0.00011696428571428573, - "loss": 1.2708, + "epoch": 7.35, + "learning_rate": 0.00010752840909090909, + "loss": 0.7194, "step": 651 }, { - "epoch": 13.19, - "learning_rate": 0.00011683673469387754, - "loss": 1.2776, + "epoch": 7.37, + "learning_rate": 0.00010738636363636365, + "loss": 0.7223, "step": 652 }, { - "epoch": 13.21, - "learning_rate": 0.00011670918367346939, - "loss": 1.248, + "epoch": 7.38, + "learning_rate": 0.00010724431818181819, + "loss": 0.7158, "step": 653 }, { - "epoch": 13.23, - "learning_rate": 0.00011658163265306123, - "loss": 1.2582, + "epoch": 7.39, + "learning_rate": 0.00010710227272727274, + "loss": 0.7122, "step": 654 }, { - "epoch": 13.25, - "learning_rate": 0.00011645408163265305, - "loss": 1.3011, + "epoch": 7.4, + "learning_rate": 0.00010696022727272728, + "loss": 0.7225, "step": 655 }, { - "epoch": 13.27, - "learning_rate": 0.0001163265306122449, - "loss": 1.2969, + "epoch": 7.41, + "learning_rate": 0.00010681818181818181, + "loss": 0.7102, "step": 656 }, { - "epoch": 13.29, - "learning_rate": 0.00011619897959183674, - "loss": 1.2454, + "epoch": 7.42, + "learning_rate": 0.00010667613636363636, + "loss": 0.7251, "step": 657 }, { - "epoch": 13.31, - "learning_rate": 0.00011607142857142858, - "loss": 1.1914, + "epoch": 7.43, + "learning_rate": 0.00010653409090909092, + "loss": 0.7191, "step": 658 }, { - "epoch": 13.33, - "learning_rate": 0.00011594387755102041, - "loss": 1.34, + "epoch": 7.45, + "learning_rate": 0.00010639204545454546, + "loss": 0.7015, "step": 659 }, { - "epoch": 13.35, - "learning_rate": 0.00011581632653061225, - "loss": 1.2828, + "epoch": 7.46, + "learning_rate": 0.00010625000000000001, + "loss": 0.693, "step": 660 }, { - "epoch": 13.37, - "learning_rate": 0.00011568877551020409, - "loss": 1.2962, + "epoch": 7.47, + "learning_rate": 0.00010610795454545455, + "loss": 0.7039, "step": 661 }, { - "epoch": 13.39, - "learning_rate": 0.00011556122448979592, - "loss": 1.3334, + "epoch": 7.48, + "learning_rate": 0.00010596590909090908, + "loss": 0.7305, "step": 662 }, { - "epoch": 13.42, - "learning_rate": 0.00011543367346938776, - "loss": 1.2832, + "epoch": 7.49, + "learning_rate": 0.00010582386363636366, + "loss": 0.6978, "step": 663 }, { - "epoch": 13.44, - "learning_rate": 0.00011530612244897961, - "loss": 1.3012, + "epoch": 7.5, + "learning_rate": 0.00010568181818181819, + "loss": 0.7219, "step": 664 }, { - "epoch": 13.46, - "learning_rate": 0.00011517857142857143, - "loss": 1.2857, + "epoch": 7.51, + "learning_rate": 0.00010553977272727273, + "loss": 0.7199, "step": 665 }, { - "epoch": 13.48, - "learning_rate": 0.00011505102040816327, - "loss": 1.2855, + "epoch": 7.52, + "learning_rate": 0.00010539772727272728, + "loss": 0.6979, "step": 666 }, { - "epoch": 13.5, - "learning_rate": 0.00011492346938775512, - "loss": 1.3077, + "epoch": 7.54, + "learning_rate": 0.00010525568181818182, + "loss": 0.7058, "step": 667 }, { - "epoch": 13.52, - "learning_rate": 0.00011479591836734696, - "loss": 1.3139, + "epoch": 7.55, + "learning_rate": 0.00010511363636363635, + "loss": 0.6994, "step": 668 }, { - "epoch": 13.54, - "learning_rate": 0.00011466836734693878, - "loss": 1.3138, + "epoch": 7.56, + "learning_rate": 0.00010497159090909093, + "loss": 0.7141, "step": 669 }, { - "epoch": 13.56, - "learning_rate": 0.00011454081632653062, - "loss": 1.2808, + "epoch": 7.57, + "learning_rate": 0.00010482954545454546, + "loss": 0.7092, "step": 670 }, { - "epoch": 13.58, - "learning_rate": 0.00011441326530612247, - "loss": 1.2492, + "epoch": 7.58, + "learning_rate": 0.0001046875, + "loss": 0.7059, "step": 671 }, { - "epoch": 13.6, - "learning_rate": 0.00011428571428571428, - "loss": 1.2027, + "epoch": 7.59, + "learning_rate": 0.00010454545454545455, + "loss": 0.6904, "step": 672 }, { - "epoch": 13.62, - "learning_rate": 0.00011415816326530613, - "loss": 1.33, + "epoch": 7.6, + "learning_rate": 0.0001044034090909091, + "loss": 0.7115, "step": 673 }, { - "epoch": 13.64, - "learning_rate": 0.00011403061224489797, - "loss": 1.3112, + "epoch": 7.61, + "learning_rate": 0.00010426136363636365, + "loss": 0.7254, "step": 674 }, { - "epoch": 13.66, - "learning_rate": 0.00011390306122448979, - "loss": 1.2772, + "epoch": 7.63, + "learning_rate": 0.0001041193181818182, + "loss": 0.7181, "step": 675 }, { - "epoch": 13.68, - "learning_rate": 0.00011377551020408163, - "loss": 1.2701, + "epoch": 7.64, + "learning_rate": 0.00010397727272727273, + "loss": 0.6867, "step": 676 }, { - "epoch": 13.7, - "learning_rate": 0.00011364795918367348, - "loss": 1.1973, + "epoch": 7.65, + "learning_rate": 0.00010383522727272727, + "loss": 0.6917, "step": 677 }, { - "epoch": 13.72, - "learning_rate": 0.0001135204081632653, - "loss": 1.3124, + "epoch": 7.66, + "learning_rate": 0.00010369318181818182, + "loss": 0.6908, "step": 678 }, { - "epoch": 13.74, - "learning_rate": 0.00011339285714285714, - "loss": 1.3085, + "epoch": 7.67, + "learning_rate": 0.00010355113636363636, + "loss": 0.6871, "step": 679 }, { - "epoch": 13.76, - "learning_rate": 0.00011326530612244898, - "loss": 1.3457, + "epoch": 7.68, + "learning_rate": 0.00010340909090909092, + "loss": 0.682, "step": 680 }, { - "epoch": 13.78, - "learning_rate": 0.00011313775510204083, - "loss": 1.3338, + "epoch": 7.69, + "learning_rate": 0.00010326704545454547, + "loss": 0.6737, "step": 681 }, { - "epoch": 13.8, - "learning_rate": 0.00011301020408163266, - "loss": 1.2753, + "epoch": 7.7, + "learning_rate": 0.000103125, + "loss": 0.7023, "step": 682 }, { - "epoch": 13.82, - "learning_rate": 0.00011288265306122449, - "loss": 1.2786, + "epoch": 7.72, + "learning_rate": 0.00010298295454545454, + "loss": 0.7079, "step": 683 }, { - "epoch": 13.84, - "learning_rate": 0.00011275510204081634, - "loss": 1.2584, + "epoch": 7.73, + "learning_rate": 0.00010284090909090909, + "loss": 0.6954, "step": 684 }, { - "epoch": 13.86, - "learning_rate": 0.00011262755102040817, - "loss": 1.2779, + "epoch": 7.74, + "learning_rate": 0.00010269886363636365, + "loss": 0.6834, "step": 685 }, { - "epoch": 13.88, - "learning_rate": 0.00011250000000000001, - "loss": 1.3502, + "epoch": 7.75, + "learning_rate": 0.0001025568181818182, + "loss": 0.6706, "step": 686 }, { - "epoch": 13.9, - "learning_rate": 0.00011237244897959185, - "loss": 1.3251, + "epoch": 7.76, + "learning_rate": 0.00010241477272727274, + "loss": 0.6706, "step": 687 }, { - "epoch": 13.92, - "learning_rate": 0.00011224489795918367, - "loss": 1.273, + "epoch": 7.77, + "learning_rate": 0.00010227272727272727, + "loss": 0.681, "step": 688 }, { - "epoch": 13.94, - "learning_rate": 0.00011211734693877552, - "loss": 1.3341, + "epoch": 7.78, + "learning_rate": 0.00010213068181818182, + "loss": 0.6853, "step": 689 }, { - "epoch": 13.96, - "learning_rate": 0.00011198979591836736, - "loss": 1.2654, + "epoch": 7.8, + "learning_rate": 0.00010198863636363636, + "loss": 0.6772, "step": 690 }, { - "epoch": 13.98, - "learning_rate": 0.00011186224489795918, - "loss": 1.3333, + "epoch": 7.81, + "learning_rate": 0.00010184659090909092, + "loss": 0.6635, "step": 691 }, { - "epoch": 14.0, - "learning_rate": 0.00011173469387755102, - "loss": 1.3246, + "epoch": 7.82, + "learning_rate": 0.00010170454545454546, + "loss": 0.6712, "step": 692 }, { - "epoch": 14.02, - "learning_rate": 0.00011160714285714287, - "loss": 1.2547, + "epoch": 7.83, + "learning_rate": 0.00010156250000000001, + "loss": 0.6884, "step": 693 }, { - "epoch": 14.04, - "learning_rate": 0.00011147959183673471, - "loss": 1.208, + "epoch": 7.84, + "learning_rate": 0.00010142045454545454, + "loss": 0.6641, "step": 694 }, { - "epoch": 14.06, - "learning_rate": 0.00011135204081632653, - "loss": 1.223, + "epoch": 7.85, + "learning_rate": 0.00010127840909090909, + "loss": 0.6838, "step": 695 }, { - "epoch": 14.08, - "learning_rate": 0.00011122448979591837, - "loss": 1.2483, + "epoch": 7.86, + "learning_rate": 0.00010113636363636366, + "loss": 0.675, "step": 696 }, { - "epoch": 14.1, - "learning_rate": 0.00011109693877551022, - "loss": 1.2823, + "epoch": 7.87, + "learning_rate": 0.00010099431818181819, + "loss": 0.6626, "step": 697 }, { - "epoch": 14.12, - "learning_rate": 0.00011096938775510204, - "loss": 1.2013, + "epoch": 7.89, + "learning_rate": 0.00010085227272727273, + "loss": 0.6605, "step": 698 }, { - "epoch": 14.14, - "learning_rate": 0.00011084183673469388, - "loss": 1.1883, + "epoch": 7.9, + "learning_rate": 0.00010071022727272728, + "loss": 0.6777, "step": 699 }, { - "epoch": 14.16, - "learning_rate": 0.00011071428571428572, - "loss": 1.2364, + "epoch": 7.91, + "learning_rate": 0.00010056818181818181, + "loss": 0.6347, "step": 700 }, { - "epoch": 14.18, - "learning_rate": 0.00011058673469387754, - "loss": 1.2069, + "epoch": 7.92, + "learning_rate": 0.00010042613636363636, + "loss": 0.6857, "step": 701 }, { - "epoch": 14.2, - "learning_rate": 0.00011045918367346939, - "loss": 1.1968, + "epoch": 7.93, + "learning_rate": 0.00010028409090909093, + "loss": 0.6677, "step": 702 }, { - "epoch": 14.22, - "learning_rate": 0.00011033163265306123, - "loss": 1.2236, + "epoch": 7.94, + "learning_rate": 0.00010014204545454546, + "loss": 0.6697, "step": 703 }, { - "epoch": 14.24, - "learning_rate": 0.00011020408163265306, - "loss": 1.1942, + "epoch": 7.95, + "learning_rate": 0.0001, + "loss": 0.6375, "step": 704 }, { - "epoch": 14.26, - "learning_rate": 0.0001100765306122449, - "loss": 1.2561, + "epoch": 7.96, + "learning_rate": 9.985795454545455e-05, + "loss": 0.6572, "step": 705 }, { - "epoch": 14.29, - "learning_rate": 0.00010994897959183674, - "loss": 1.1839, + "epoch": 7.98, + "learning_rate": 9.97159090909091e-05, + "loss": 0.668, "step": 706 }, { - "epoch": 14.31, - "learning_rate": 0.00010982142857142858, - "loss": 1.2128, + "epoch": 7.99, + "learning_rate": 9.957386363636364e-05, + "loss": 0.6797, "step": 707 }, { - "epoch": 14.33, - "learning_rate": 0.00010969387755102041, - "loss": 1.3086, + "epoch": 8.0, + "learning_rate": 9.943181818181819e-05, + "loss": 0.6784, "step": 708 }, { - "epoch": 14.35, - "learning_rate": 0.00010956632653061226, - "loss": 1.2379, + "epoch": 8.01, + "learning_rate": 9.928977272727273e-05, + "loss": 0.6192, "step": 709 }, { - "epoch": 14.37, - "learning_rate": 0.0001094387755102041, - "loss": 1.176, + "epoch": 8.02, + "learning_rate": 9.914772727272728e-05, + "loss": 0.6287, "step": 710 }, { - "epoch": 14.39, - "learning_rate": 0.00010931122448979592, - "loss": 1.2105, + "epoch": 8.03, + "learning_rate": 9.900568181818183e-05, + "loss": 0.6034, "step": 711 }, { - "epoch": 14.41, - "learning_rate": 0.00010918367346938776, - "loss": 1.2149, + "epoch": 8.04, + "learning_rate": 9.886363636363637e-05, + "loss": 0.6167, "step": 712 }, { - "epoch": 14.43, - "learning_rate": 0.0001090561224489796, - "loss": 1.2392, + "epoch": 8.06, + "learning_rate": 9.872159090909091e-05, + "loss": 0.6353, "step": 713 }, { - "epoch": 14.45, - "learning_rate": 0.00010892857142857142, - "loss": 1.2471, + "epoch": 8.07, + "learning_rate": 9.857954545454547e-05, + "loss": 0.6222, "step": 714 }, { - "epoch": 14.47, - "learning_rate": 0.00010880102040816327, - "loss": 1.2561, + "epoch": 8.08, + "learning_rate": 9.84375e-05, + "loss": 0.5963, "step": 715 }, { - "epoch": 14.49, - "learning_rate": 0.00010867346938775511, - "loss": 1.2179, + "epoch": 8.09, + "learning_rate": 9.829545454545455e-05, + "loss": 0.6042, "step": 716 }, { - "epoch": 14.51, - "learning_rate": 0.00010854591836734696, - "loss": 1.2459, + "epoch": 8.1, + "learning_rate": 9.81534090909091e-05, + "loss": 0.612, "step": 717 }, { - "epoch": 14.53, - "learning_rate": 0.00010841836734693877, - "loss": 1.2933, + "epoch": 8.11, + "learning_rate": 9.801136363636364e-05, + "loss": 0.6069, "step": 718 }, { - "epoch": 14.55, - "learning_rate": 0.00010829081632653062, - "loss": 1.2862, + "epoch": 8.12, + "learning_rate": 9.786931818181818e-05, + "loss": 0.6001, "step": 719 }, { - "epoch": 14.57, - "learning_rate": 0.00010816326530612246, - "loss": 1.2976, + "epoch": 8.13, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6007, "step": 720 }, { - "epoch": 14.59, - "learning_rate": 0.00010803571428571428, - "loss": 1.231, + "epoch": 8.15, + "learning_rate": 9.758522727272727e-05, + "loss": 0.6079, "step": 721 }, { - "epoch": 14.61, - "learning_rate": 0.00010790816326530613, - "loss": 1.2464, + "epoch": 8.16, + "learning_rate": 9.744318181818183e-05, + "loss": 0.6216, "step": 722 }, { - "epoch": 14.63, - "learning_rate": 0.00010778061224489797, - "loss": 1.2181, + "epoch": 8.17, + "learning_rate": 9.730113636363637e-05, + "loss": 0.6321, "step": 723 }, { - "epoch": 14.65, - "learning_rate": 0.00010765306122448979, - "loss": 1.3307, + "epoch": 8.18, + "learning_rate": 9.71590909090909e-05, + "loss": 0.6044, "step": 724 }, { - "epoch": 14.67, - "learning_rate": 0.00010752551020408163, - "loss": 1.1723, + "epoch": 8.19, + "learning_rate": 9.701704545454547e-05, + "loss": 0.6028, "step": 725 }, { - "epoch": 14.69, - "learning_rate": 0.00010739795918367348, - "loss": 1.1528, + "epoch": 8.2, + "learning_rate": 9.687500000000001e-05, + "loss": 0.6098, "step": 726 }, { - "epoch": 14.71, - "learning_rate": 0.0001072704081632653, - "loss": 1.215, + "epoch": 8.21, + "learning_rate": 9.673295454545454e-05, + "loss": 0.6032, "step": 727 }, { - "epoch": 14.73, - "learning_rate": 0.00010714285714285715, - "loss": 1.2624, + "epoch": 8.22, + "learning_rate": 9.65909090909091e-05, + "loss": 0.6298, "step": 728 }, { - "epoch": 14.75, - "learning_rate": 0.00010701530612244898, - "loss": 1.3117, + "epoch": 8.24, + "learning_rate": 9.644886363636365e-05, + "loss": 0.6115, "step": 729 }, { - "epoch": 14.77, - "learning_rate": 0.00010688775510204083, - "loss": 1.2572, + "epoch": 8.25, + "learning_rate": 9.630681818181818e-05, + "loss": 0.6052, "step": 730 }, { - "epoch": 14.79, - "learning_rate": 0.00010676020408163266, - "loss": 1.222, + "epoch": 8.26, + "learning_rate": 9.616477272727274e-05, + "loss": 0.6097, "step": 731 }, { - "epoch": 14.81, - "learning_rate": 0.0001066326530612245, - "loss": 1.2881, + "epoch": 8.27, + "learning_rate": 9.602272727272728e-05, + "loss": 0.6062, "step": 732 }, { - "epoch": 14.83, - "learning_rate": 0.00010650510204081635, - "loss": 1.2676, + "epoch": 8.28, + "learning_rate": 9.588068181818183e-05, + "loss": 0.5984, "step": 733 }, { - "epoch": 14.85, - "learning_rate": 0.00010637755102040816, - "loss": 1.2734, + "epoch": 8.29, + "learning_rate": 9.573863636363637e-05, + "loss": 0.6432, "step": 734 }, { - "epoch": 14.87, - "learning_rate": 0.00010625000000000001, - "loss": 1.2885, + "epoch": 8.3, + "learning_rate": 9.559659090909092e-05, + "loss": 0.5814, "step": 735 }, { - "epoch": 14.89, - "learning_rate": 0.00010612244897959185, - "loss": 1.2764, + "epoch": 8.31, + "learning_rate": 9.545454545454546e-05, + "loss": 0.5965, "step": 736 }, { - "epoch": 14.91, - "learning_rate": 0.00010599489795918367, - "loss": 1.3267, + "epoch": 8.33, + "learning_rate": 9.53125e-05, + "loss": 0.6102, "step": 737 }, { - "epoch": 14.93, - "learning_rate": 0.00010586734693877551, - "loss": 1.2445, + "epoch": 8.34, + "learning_rate": 9.517045454545455e-05, + "loss": 0.5849, "step": 738 }, { - "epoch": 14.95, - "learning_rate": 0.00010573979591836736, - "loss": 1.3359, + "epoch": 8.35, + "learning_rate": 9.50284090909091e-05, + "loss": 0.6062, "step": 739 }, { - "epoch": 14.97, - "learning_rate": 0.00010561224489795918, - "loss": 1.2508, + "epoch": 8.36, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6031, "step": 740 }, { - "epoch": 14.99, - "learning_rate": 0.00010548469387755102, - "loss": 1.2227, + "epoch": 8.37, + "learning_rate": 9.474431818181819e-05, + "loss": 0.5932, "step": 741 }, { - "epoch": 15.01, - "learning_rate": 0.00010535714285714286, - "loss": 1.1889, + "epoch": 8.38, + "learning_rate": 9.460227272727273e-05, + "loss": 0.589, "step": 742 }, { - "epoch": 15.03, - "learning_rate": 0.00010522959183673471, - "loss": 1.1919, + "epoch": 8.39, + "learning_rate": 9.446022727272728e-05, + "loss": 0.6096, "step": 743 }, { - "epoch": 15.05, - "learning_rate": 0.00010510204081632653, - "loss": 1.2383, + "epoch": 8.41, + "learning_rate": 9.431818181818182e-05, + "loss": 0.601, "step": 744 }, { - "epoch": 15.07, - "learning_rate": 0.00010497448979591837, - "loss": 1.2401, + "epoch": 8.42, + "learning_rate": 9.417613636363637e-05, + "loss": 0.5798, "step": 745 }, { - "epoch": 15.09, - "learning_rate": 0.00010484693877551021, - "loss": 1.2015, + "epoch": 8.43, + "learning_rate": 9.403409090909091e-05, + "loss": 0.59, "step": 746 }, { - "epoch": 15.11, - "learning_rate": 0.00010471938775510203, - "loss": 1.1509, + "epoch": 8.44, + "learning_rate": 9.389204545454546e-05, + "loss": 0.5988, "step": 747 }, { - "epoch": 15.13, - "learning_rate": 0.00010459183673469388, - "loss": 1.1878, + "epoch": 8.45, + "learning_rate": 9.375e-05, + "loss": 0.5591, "step": 748 }, { - "epoch": 15.16, - "learning_rate": 0.00010446428571428572, - "loss": 1.1706, + "epoch": 8.46, + "learning_rate": 9.360795454545455e-05, + "loss": 0.5939, "step": 749 }, { - "epoch": 15.18, - "learning_rate": 0.00010433673469387755, - "loss": 1.1285, + "epoch": 8.47, + "learning_rate": 9.346590909090909e-05, + "loss": 0.5886, "step": 750 }, { - "epoch": 15.2, - "learning_rate": 0.0001042091836734694, - "loss": 1.1608, + "epoch": 8.48, + "learning_rate": 9.332386363636364e-05, + "loss": 0.5994, "step": 751 }, { - "epoch": 15.22, - "learning_rate": 0.00010408163265306123, - "loss": 1.1178, + "epoch": 8.5, + "learning_rate": 9.318181818181818e-05, + "loss": 0.5821, "step": 752 }, { - "epoch": 15.24, - "learning_rate": 0.00010395408163265306, - "loss": 1.1293, + "epoch": 8.51, + "learning_rate": 9.303977272727273e-05, + "loss": 0.602, "step": 753 }, { - "epoch": 15.26, - "learning_rate": 0.0001038265306122449, - "loss": 1.2306, + "epoch": 8.52, + "learning_rate": 9.289772727272727e-05, + "loss": 0.5708, "step": 754 }, { - "epoch": 15.28, - "learning_rate": 0.00010369897959183675, - "loss": 1.1541, + "epoch": 8.53, + "learning_rate": 9.275568181818183e-05, + "loss": 0.5902, "step": 755 }, { - "epoch": 15.3, - "learning_rate": 0.00010357142857142859, - "loss": 1.1702, + "epoch": 8.54, + "learning_rate": 9.261363636363636e-05, + "loss": 0.6053, "step": 756 }, { - "epoch": 15.32, - "learning_rate": 0.00010344387755102041, - "loss": 1.2119, + "epoch": 8.55, + "learning_rate": 9.247159090909091e-05, + "loss": 0.5797, "step": 757 }, { - "epoch": 15.34, - "learning_rate": 0.00010331632653061225, - "loss": 1.2239, + "epoch": 8.56, + "learning_rate": 9.232954545454547e-05, + "loss": 0.5965, "step": 758 }, { - "epoch": 15.36, - "learning_rate": 0.0001031887755102041, - "loss": 1.2019, + "epoch": 8.57, + "learning_rate": 9.21875e-05, + "loss": 0.5738, "step": 759 }, { - "epoch": 15.38, - "learning_rate": 0.00010306122448979591, - "loss": 1.2197, + "epoch": 8.59, + "learning_rate": 9.204545454545454e-05, + "loss": 0.5819, "step": 760 }, { - "epoch": 15.4, - "learning_rate": 0.00010293367346938776, - "loss": 1.1769, + "epoch": 8.6, + "learning_rate": 9.19034090909091e-05, + "loss": 0.5994, "step": 761 }, { - "epoch": 15.42, - "learning_rate": 0.0001028061224489796, - "loss": 1.1907, + "epoch": 8.61, + "learning_rate": 9.176136363636363e-05, + "loss": 0.5738, "step": 762 }, { - "epoch": 15.44, - "learning_rate": 0.00010267857142857142, - "loss": 1.2089, + "epoch": 8.62, + "learning_rate": 9.161931818181818e-05, + "loss": 0.5663, "step": 763 }, { - "epoch": 15.46, - "learning_rate": 0.00010255102040816327, - "loss": 1.1335, + "epoch": 8.63, + "learning_rate": 9.147727272727274e-05, + "loss": 0.5798, "step": 764 }, { - "epoch": 15.48, - "learning_rate": 0.00010242346938775511, - "loss": 1.1633, + "epoch": 8.64, + "learning_rate": 9.133522727272727e-05, + "loss": 0.5705, "step": 765 }, { - "epoch": 15.5, - "learning_rate": 0.00010229591836734695, - "loss": 1.1578, + "epoch": 8.65, + "learning_rate": 9.119318181818183e-05, + "loss": 0.5943, "step": 766 }, { - "epoch": 15.52, - "learning_rate": 0.00010216836734693877, - "loss": 1.2236, + "epoch": 8.67, + "learning_rate": 9.105113636363637e-05, + "loss": 0.6019, "step": 767 }, { - "epoch": 15.54, - "learning_rate": 0.00010204081632653062, - "loss": 1.1941, + "epoch": 8.68, + "learning_rate": 9.090909090909092e-05, + "loss": 0.5733, "step": 768 }, { - "epoch": 15.56, - "learning_rate": 0.00010191326530612246, - "loss": 1.2666, + "epoch": 8.69, + "learning_rate": 9.076704545454546e-05, + "loss": 0.575, "step": 769 }, { - "epoch": 15.58, - "learning_rate": 0.00010178571428571428, - "loss": 1.1232, + "epoch": 8.7, + "learning_rate": 9.062500000000001e-05, + "loss": 0.5675, "step": 770 }, { - "epoch": 15.6, - "learning_rate": 0.00010165816326530612, - "loss": 1.2242, + "epoch": 8.71, + "learning_rate": 9.048295454545455e-05, + "loss": 0.566, "step": 771 }, { - "epoch": 15.62, - "learning_rate": 0.00010153061224489797, - "loss": 1.1852, + "epoch": 8.72, + "learning_rate": 9.03409090909091e-05, + "loss": 0.5513, "step": 772 }, { - "epoch": 15.64, - "learning_rate": 0.0001014030612244898, - "loss": 1.2626, + "epoch": 8.73, + "learning_rate": 9.019886363636364e-05, + "loss": 0.5682, "step": 773 }, { - "epoch": 15.66, - "learning_rate": 0.00010127551020408164, - "loss": 1.1873, + "epoch": 8.74, + "learning_rate": 9.005681818181819e-05, + "loss": 0.5508, "step": 774 }, { - "epoch": 15.68, - "learning_rate": 0.00010114795918367349, - "loss": 1.3005, + "epoch": 8.76, + "learning_rate": 8.991477272727273e-05, + "loss": 0.5668, "step": 775 }, { - "epoch": 15.7, - "learning_rate": 0.0001010204081632653, - "loss": 1.1904, + "epoch": 8.77, + "learning_rate": 8.977272727272728e-05, + "loss": 0.569, "step": 776 }, { - "epoch": 15.72, - "learning_rate": 0.00010089285714285715, - "loss": 1.2927, + "epoch": 8.78, + "learning_rate": 8.963068181818182e-05, + "loss": 0.5897, "step": 777 }, { - "epoch": 15.74, - "learning_rate": 0.00010076530612244899, - "loss": 1.179, + "epoch": 8.79, + "learning_rate": 8.948863636363637e-05, + "loss": 0.5738, "step": 778 }, { - "epoch": 15.76, - "learning_rate": 0.00010063775510204084, - "loss": 1.2027, + "epoch": 8.8, + "learning_rate": 8.934659090909091e-05, + "loss": 0.5511, "step": 779 }, { - "epoch": 15.78, - "learning_rate": 0.00010051020408163265, - "loss": 1.2428, + "epoch": 8.81, + "learning_rate": 8.920454545454546e-05, + "loss": 0.5659, "step": 780 }, { - "epoch": 15.8, - "learning_rate": 0.0001003826530612245, - "loss": 1.2324, + "epoch": 8.82, + "learning_rate": 8.90625e-05, + "loss": 0.5649, "step": 781 }, { - "epoch": 15.82, - "learning_rate": 0.00010025510204081634, - "loss": 1.1251, + "epoch": 8.83, + "learning_rate": 8.892045454545455e-05, + "loss": 0.5618, "step": 782 }, { - "epoch": 15.84, - "learning_rate": 0.00010012755102040816, - "loss": 1.2405, + "epoch": 8.85, + "learning_rate": 8.87784090909091e-05, + "loss": 0.5602, "step": 783 }, { - "epoch": 15.86, - "learning_rate": 0.0001, - "loss": 1.2005, + "epoch": 8.86, + "learning_rate": 8.863636363636364e-05, + "loss": 0.5723, "step": 784 }, { - "epoch": 15.88, - "learning_rate": 9.987244897959184e-05, - "loss": 1.2259, + "epoch": 8.87, + "learning_rate": 8.849431818181818e-05, + "loss": 0.5816, "step": 785 }, { - "epoch": 15.9, - "learning_rate": 9.974489795918368e-05, - "loss": 1.1576, + "epoch": 8.88, + "learning_rate": 8.835227272727273e-05, + "loss": 0.555, "step": 786 }, { - "epoch": 15.92, - "learning_rate": 9.961734693877551e-05, - "loss": 1.1834, + "epoch": 8.89, + "learning_rate": 8.821022727272727e-05, + "loss": 0.5563, "step": 787 }, { - "epoch": 15.94, - "learning_rate": 9.948979591836736e-05, - "loss": 1.2396, + "epoch": 8.9, + "learning_rate": 8.806818181818183e-05, + "loss": 0.554, "step": 788 }, { - "epoch": 15.96, - "learning_rate": 9.936224489795919e-05, - "loss": 1.1865, + "epoch": 8.91, + "learning_rate": 8.792613636363636e-05, + "loss": 0.5671, "step": 789 }, { - "epoch": 15.98, - "learning_rate": 9.923469387755102e-05, - "loss": 1.2356, + "epoch": 8.92, + "learning_rate": 8.778409090909091e-05, + "loss": 0.5485, "step": 790 }, { - "epoch": 16.01, - "learning_rate": 9.910714285714286e-05, - "loss": 1.2639, + "epoch": 8.94, + "learning_rate": 8.764204545454547e-05, + "loss": 0.5712, "step": 791 }, { - "epoch": 16.03, - "learning_rate": 9.897959183673469e-05, - "loss": 1.1216, + "epoch": 8.95, + "learning_rate": 8.75e-05, + "loss": 0.5507, "step": 792 }, { - "epoch": 16.05, - "learning_rate": 9.885204081632652e-05, - "loss": 1.1051, + "epoch": 8.96, + "learning_rate": 8.735795454545454e-05, + "loss": 0.5718, "step": 793 }, { - "epoch": 16.07, - "learning_rate": 9.872448979591837e-05, - "loss": 1.0864, + "epoch": 8.97, + "learning_rate": 8.72159090909091e-05, + "loss": 0.5585, "step": 794 }, { - "epoch": 16.09, - "learning_rate": 9.859693877551021e-05, - "loss": 1.182, + "epoch": 8.98, + "learning_rate": 8.707386363636363e-05, + "loss": 0.5563, "step": 795 }, { - "epoch": 16.11, - "learning_rate": 9.846938775510204e-05, - "loss": 1.1272, + "epoch": 8.99, + "learning_rate": 8.693181818181818e-05, + "loss": 0.581, "step": 796 }, { - "epoch": 16.13, - "learning_rate": 9.834183673469389e-05, - "loss": 1.1946, + "epoch": 9.0, + "learning_rate": 8.678977272727274e-05, + "loss": 0.5511, "step": 797 }, { - "epoch": 16.15, - "learning_rate": 9.821428571428572e-05, - "loss": 1.0875, + "epoch": 9.02, + "learning_rate": 8.664772727272727e-05, + "loss": 0.5103, "step": 798 }, { - "epoch": 16.17, - "learning_rate": 9.808673469387756e-05, - "loss": 1.1671, + "epoch": 9.03, + "learning_rate": 8.650568181818183e-05, + "loss": 0.5323, "step": 799 }, { - "epoch": 16.19, - "learning_rate": 9.79591836734694e-05, - "loss": 1.1502, + "epoch": 9.04, + "learning_rate": 8.636363636363637e-05, + "loss": 0.5092, "step": 800 }, { - "epoch": 16.21, - "learning_rate": 9.783163265306124e-05, - "loss": 1.19, + "epoch": 9.05, + "learning_rate": 8.62215909090909e-05, + "loss": 0.5247, "step": 801 }, { - "epoch": 16.23, - "learning_rate": 9.770408163265307e-05, - "loss": 1.1258, + "epoch": 9.06, + "learning_rate": 8.607954545454546e-05, + "loss": 0.5403, "step": 802 }, { - "epoch": 16.25, - "learning_rate": 9.75765306122449e-05, - "loss": 1.1765, + "epoch": 9.07, + "learning_rate": 8.593750000000001e-05, + "loss": 0.5252, "step": 803 }, { - "epoch": 16.27, - "learning_rate": 9.744897959183674e-05, - "loss": 1.1217, + "epoch": 9.08, + "learning_rate": 8.579545454545454e-05, + "loss": 0.5296, "step": 804 }, { - "epoch": 16.29, - "learning_rate": 9.732142857142858e-05, - "loss": 1.1293, + "epoch": 9.09, + "learning_rate": 8.56534090909091e-05, + "loss": 0.5223, "step": 805 }, { - "epoch": 16.31, - "learning_rate": 9.719387755102042e-05, - "loss": 1.17, + "epoch": 9.11, + "learning_rate": 8.551136363636364e-05, + "loss": 0.4972, "step": 806 }, { - "epoch": 16.33, - "learning_rate": 9.706632653061225e-05, - "loss": 1.17, + "epoch": 9.12, + "learning_rate": 8.536931818181818e-05, + "loss": 0.5005, "step": 807 }, { - "epoch": 16.35, - "learning_rate": 9.693877551020408e-05, - "loss": 1.2004, + "epoch": 9.13, + "learning_rate": 8.522727272727273e-05, + "loss": 0.5249, "step": 808 }, { - "epoch": 16.37, - "learning_rate": 9.681122448979593e-05, - "loss": 1.1648, + "epoch": 9.14, + "learning_rate": 8.508522727272728e-05, + "loss": 0.5135, "step": 809 }, { - "epoch": 16.39, - "learning_rate": 9.668367346938776e-05, - "loss": 1.0688, + "epoch": 9.15, + "learning_rate": 8.494318181818182e-05, + "loss": 0.5053, "step": 810 }, { - "epoch": 16.41, - "learning_rate": 9.655612244897959e-05, - "loss": 1.1607, + "epoch": 9.16, + "learning_rate": 8.480113636363637e-05, + "loss": 0.5158, "step": 811 }, { - "epoch": 16.43, - "learning_rate": 9.642857142857143e-05, - "loss": 1.1298, + "epoch": 9.17, + "learning_rate": 8.465909090909091e-05, + "loss": 0.5061, "step": 812 }, { - "epoch": 16.45, - "learning_rate": 9.630102040816326e-05, - "loss": 1.1064, + "epoch": 9.18, + "learning_rate": 8.451704545454546e-05, + "loss": 0.4988, "step": 813 }, { - "epoch": 16.47, - "learning_rate": 9.617346938775511e-05, - "loss": 1.1472, + "epoch": 9.2, + "learning_rate": 8.4375e-05, + "loss": 0.5273, "step": 814 }, { - "epoch": 16.49, - "learning_rate": 9.604591836734694e-05, - "loss": 1.1577, + "epoch": 9.21, + "learning_rate": 8.423295454545455e-05, + "loss": 0.5332, "step": 815 }, { - "epoch": 16.51, - "learning_rate": 9.591836734693878e-05, - "loss": 1.1436, + "epoch": 9.22, + "learning_rate": 8.40909090909091e-05, + "loss": 0.5181, "step": 816 }, { - "epoch": 16.53, - "learning_rate": 9.579081632653061e-05, - "loss": 1.1657, + "epoch": 9.23, + "learning_rate": 8.394886363636364e-05, + "loss": 0.5085, "step": 817 }, { - "epoch": 16.55, - "learning_rate": 9.566326530612246e-05, - "loss": 1.1147, + "epoch": 9.24, + "learning_rate": 8.380681818181818e-05, + "loss": 0.5137, "step": 818 }, { - "epoch": 16.57, - "learning_rate": 9.553571428571429e-05, - "loss": 1.1839, + "epoch": 9.25, + "learning_rate": 8.366477272727273e-05, + "loss": 0.5195, "step": 819 }, { - "epoch": 16.59, - "learning_rate": 9.540816326530613e-05, - "loss": 1.1298, + "epoch": 9.26, + "learning_rate": 8.352272727272727e-05, + "loss": 0.5077, "step": 820 }, { - "epoch": 16.61, - "learning_rate": 9.528061224489796e-05, - "loss": 1.2141, + "epoch": 9.28, + "learning_rate": 8.338068181818183e-05, + "loss": 0.5074, "step": 821 }, { - "epoch": 16.63, - "learning_rate": 9.515306122448981e-05, - "loss": 1.2045, + "epoch": 9.29, + "learning_rate": 8.323863636363637e-05, + "loss": 0.5142, "step": 822 }, { - "epoch": 16.65, - "learning_rate": 9.502551020408164e-05, - "loss": 1.1791, + "epoch": 9.3, + "learning_rate": 8.309659090909091e-05, + "loss": 0.5116, "step": 823 }, { - "epoch": 16.67, - "learning_rate": 9.489795918367348e-05, - "loss": 1.1137, + "epoch": 9.31, + "learning_rate": 8.295454545454547e-05, + "loss": 0.4974, "step": 824 }, { - "epoch": 16.69, - "learning_rate": 9.477040816326531e-05, - "loss": 1.1312, + "epoch": 9.32, + "learning_rate": 8.28125e-05, + "loss": 0.5117, "step": 825 }, { - "epoch": 16.71, - "learning_rate": 9.464285714285715e-05, - "loss": 1.1102, + "epoch": 9.33, + "learning_rate": 8.267045454545455e-05, + "loss": 0.5114, "step": 826 }, { - "epoch": 16.73, - "learning_rate": 9.451530612244899e-05, - "loss": 1.1865, + "epoch": 9.34, + "learning_rate": 8.25284090909091e-05, + "loss": 0.5039, "step": 827 }, { - "epoch": 16.75, - "learning_rate": 9.438775510204082e-05, - "loss": 1.1232, + "epoch": 9.35, + "learning_rate": 8.238636363636364e-05, + "loss": 0.498, "step": 828 }, { - "epoch": 16.77, - "learning_rate": 9.426020408163265e-05, - "loss": 1.2068, + "epoch": 9.37, + "learning_rate": 8.224431818181818e-05, + "loss": 0.5042, "step": 829 }, { - "epoch": 16.79, - "learning_rate": 9.41326530612245e-05, - "loss": 1.1864, + "epoch": 9.38, + "learning_rate": 8.210227272727274e-05, + "loss": 0.5049, "step": 830 }, { - "epoch": 16.81, - "learning_rate": 9.400510204081633e-05, - "loss": 1.2195, + "epoch": 9.39, + "learning_rate": 8.196022727272727e-05, + "loss": 0.5123, "step": 831 }, { - "epoch": 16.83, - "learning_rate": 9.387755102040817e-05, - "loss": 1.2063, + "epoch": 9.4, + "learning_rate": 8.181818181818183e-05, + "loss": 0.4907, "step": 832 }, { - "epoch": 16.85, - "learning_rate": 9.375e-05, - "loss": 1.1455, + "epoch": 9.41, + "learning_rate": 8.167613636363637e-05, + "loss": 0.5267, "step": 833 }, { - "epoch": 16.88, - "learning_rate": 9.362244897959183e-05, - "loss": 1.1819, + "epoch": 9.42, + "learning_rate": 8.15340909090909e-05, + "loss": 0.5314, "step": 834 }, { - "epoch": 16.9, - "learning_rate": 9.349489795918368e-05, - "loss": 1.1887, + "epoch": 9.43, + "learning_rate": 8.139204545454546e-05, + "loss": 0.4952, "step": 835 }, { - "epoch": 16.92, - "learning_rate": 9.336734693877551e-05, - "loss": 1.1557, + "epoch": 9.44, + "learning_rate": 8.125000000000001e-05, + "loss": 0.5014, "step": 836 }, { - "epoch": 16.94, - "learning_rate": 9.323979591836735e-05, - "loss": 1.2094, + "epoch": 9.46, + "learning_rate": 8.110795454545454e-05, + "loss": 0.4967, "step": 837 }, { - "epoch": 16.96, - "learning_rate": 9.311224489795918e-05, - "loss": 1.1512, + "epoch": 9.47, + "learning_rate": 8.09659090909091e-05, + "loss": 0.5116, "step": 838 }, { - "epoch": 16.98, - "learning_rate": 9.298469387755103e-05, - "loss": 1.1463, + "epoch": 9.48, + "learning_rate": 8.082386363636365e-05, + "loss": 0.5119, "step": 839 }, { - "epoch": 17.0, - "learning_rate": 9.285714285714286e-05, - "loss": 1.155, + "epoch": 9.49, + "learning_rate": 8.068181818181818e-05, + "loss": 0.4987, "step": 840 }, { - "epoch": 17.02, - "learning_rate": 9.27295918367347e-05, - "loss": 1.1292, + "epoch": 9.5, + "learning_rate": 8.053977272727274e-05, + "loss": 0.5063, "step": 841 }, { - "epoch": 17.04, - "learning_rate": 9.260204081632653e-05, - "loss": 1.0996, + "epoch": 9.51, + "learning_rate": 8.039772727272728e-05, + "loss": 0.5019, "step": 842 }, { - "epoch": 17.06, - "learning_rate": 9.247448979591838e-05, - "loss": 1.0662, + "epoch": 9.52, + "learning_rate": 8.025568181818183e-05, + "loss": 0.5272, "step": 843 }, { - "epoch": 17.08, - "learning_rate": 9.234693877551021e-05, - "loss": 1.0931, + "epoch": 9.54, + "learning_rate": 8.011363636363637e-05, + "loss": 0.4969, "step": 844 }, { - "epoch": 17.1, - "learning_rate": 9.221938775510205e-05, - "loss": 1.0727, + "epoch": 9.55, + "learning_rate": 7.997159090909092e-05, + "loss": 0.5222, "step": 845 }, { - "epoch": 17.12, - "learning_rate": 9.209183673469388e-05, - "loss": 1.1043, + "epoch": 9.56, + "learning_rate": 7.982954545454546e-05, + "loss": 0.4729, "step": 846 }, { - "epoch": 17.14, - "learning_rate": 9.196428571428572e-05, - "loss": 1.0594, + "epoch": 9.57, + "learning_rate": 7.96875e-05, + "loss": 0.4976, "step": 847 }, { - "epoch": 17.16, - "learning_rate": 9.183673469387756e-05, - "loss": 1.0952, + "epoch": 9.58, + "learning_rate": 7.954545454545455e-05, + "loss": 0.4974, "step": 848 }, { - "epoch": 17.18, - "learning_rate": 9.170918367346939e-05, - "loss": 1.0639, + "epoch": 9.59, + "learning_rate": 7.94034090909091e-05, + "loss": 0.4849, "step": 849 }, { - "epoch": 17.2, - "learning_rate": 9.158163265306124e-05, - "loss": 1.132, + "epoch": 9.6, + "learning_rate": 7.926136363636364e-05, + "loss": 0.4897, "step": 850 }, { - "epoch": 17.22, - "learning_rate": 9.145408163265307e-05, - "loss": 1.1083, + "epoch": 9.61, + "learning_rate": 7.911931818181819e-05, + "loss": 0.4962, "step": 851 }, { - "epoch": 17.24, - "learning_rate": 9.13265306122449e-05, - "loss": 1.1282, + "epoch": 9.63, + "learning_rate": 7.897727272727273e-05, + "loss": 0.4877, "step": 852 }, { - "epoch": 17.26, - "learning_rate": 9.119897959183674e-05, - "loss": 1.0474, + "epoch": 9.64, + "learning_rate": 7.883522727272728e-05, + "loss": 0.4921, "step": 853 }, { - "epoch": 17.28, - "learning_rate": 9.107142857142857e-05, - "loss": 1.1138, + "epoch": 9.65, + "learning_rate": 7.869318181818182e-05, + "loss": 0.4969, "step": 854 }, { - "epoch": 17.3, - "learning_rate": 9.094387755102042e-05, - "loss": 1.1025, + "epoch": 9.66, + "learning_rate": 7.855113636363637e-05, + "loss": 0.5045, "step": 855 }, { - "epoch": 17.32, - "learning_rate": 9.081632653061225e-05, - "loss": 1.0968, + "epoch": 9.67, + "learning_rate": 7.840909090909091e-05, + "loss": 0.5207, "step": 856 }, { - "epoch": 17.34, - "learning_rate": 9.068877551020408e-05, - "loss": 1.1683, + "epoch": 9.68, + "learning_rate": 7.826704545454546e-05, + "loss": 0.5098, "step": 857 }, { - "epoch": 17.36, - "learning_rate": 9.056122448979592e-05, - "loss": 1.0975, + "epoch": 9.69, + "learning_rate": 7.8125e-05, + "loss": 0.5005, "step": 858 }, { - "epoch": 17.38, - "learning_rate": 9.043367346938775e-05, - "loss": 1.1274, + "epoch": 9.7, + "learning_rate": 7.798295454545455e-05, + "loss": 0.5028, "step": 859 }, { - "epoch": 17.4, - "learning_rate": 9.030612244897958e-05, - "loss": 1.0916, + "epoch": 9.72, + "learning_rate": 7.784090909090909e-05, + "loss": 0.5067, "step": 860 }, { - "epoch": 17.42, - "learning_rate": 9.017857142857143e-05, - "loss": 1.0912, + "epoch": 9.73, + "learning_rate": 7.769886363636364e-05, + "loss": 0.484, "step": 861 }, { - "epoch": 17.44, - "learning_rate": 9.005102040816327e-05, - "loss": 1.0875, + "epoch": 9.74, + "learning_rate": 7.755681818181818e-05, + "loss": 0.5029, "step": 862 }, { - "epoch": 17.46, - "learning_rate": 8.99234693877551e-05, - "loss": 1.05, + "epoch": 9.75, + "learning_rate": 7.741477272727273e-05, + "loss": 0.5077, "step": 863 }, { - "epoch": 17.48, - "learning_rate": 8.979591836734695e-05, - "loss": 1.1418, + "epoch": 9.76, + "learning_rate": 7.727272727272727e-05, + "loss": 0.5091, "step": 864 }, { - "epoch": 17.5, - "learning_rate": 8.966836734693878e-05, - "loss": 1.0609, + "epoch": 9.77, + "learning_rate": 7.713068181818183e-05, + "loss": 0.4781, "step": 865 }, { - "epoch": 17.52, - "learning_rate": 8.954081632653062e-05, - "loss": 1.1611, + "epoch": 9.78, + "learning_rate": 7.698863636363636e-05, + "loss": 0.5124, "step": 866 }, { - "epoch": 17.54, - "learning_rate": 8.941326530612245e-05, - "loss": 1.1065, + "epoch": 9.79, + "learning_rate": 7.684659090909091e-05, + "loss": 0.4859, "step": 867 }, { - "epoch": 17.56, - "learning_rate": 8.92857142857143e-05, - "loss": 1.1611, + "epoch": 9.81, + "learning_rate": 7.670454545454547e-05, + "loss": 0.4872, "step": 868 }, { - "epoch": 17.58, - "learning_rate": 8.915816326530613e-05, - "loss": 1.1398, + "epoch": 9.82, + "learning_rate": 7.65625e-05, + "loss": 0.4675, "step": 869 }, { - "epoch": 17.6, - "learning_rate": 8.903061224489796e-05, - "loss": 1.1055, + "epoch": 9.83, + "learning_rate": 7.642045454545454e-05, + "loss": 0.5056, "step": 870 }, { - "epoch": 17.62, - "learning_rate": 8.89030612244898e-05, - "loss": 1.1314, + "epoch": 9.84, + "learning_rate": 7.62784090909091e-05, + "loss": 0.4868, "step": 871 }, { - "epoch": 17.64, - "learning_rate": 8.877551020408164e-05, - "loss": 1.1084, + "epoch": 9.85, + "learning_rate": 7.613636363636363e-05, + "loss": 0.4907, "step": 872 }, { - "epoch": 17.66, - "learning_rate": 8.864795918367348e-05, - "loss": 1.1254, + "epoch": 9.86, + "learning_rate": 7.599431818181818e-05, + "loss": 0.474, "step": 873 }, { - "epoch": 17.68, - "learning_rate": 8.852040816326531e-05, - "loss": 1.142, + "epoch": 9.87, + "learning_rate": 7.585227272727274e-05, + "loss": 0.4813, "step": 874 }, { - "epoch": 17.7, - "learning_rate": 8.839285714285714e-05, - "loss": 1.1371, + "epoch": 9.89, + "learning_rate": 7.571022727272727e-05, + "loss": 0.4838, "step": 875 }, { - "epoch": 17.72, - "learning_rate": 8.826530612244899e-05, - "loss": 1.1092, + "epoch": 9.9, + "learning_rate": 7.556818181818183e-05, + "loss": 0.4935, "step": 876 }, { - "epoch": 17.75, - "learning_rate": 8.813775510204082e-05, - "loss": 1.161, + "epoch": 9.91, + "learning_rate": 7.542613636363637e-05, + "loss": 0.4884, "step": 877 }, { - "epoch": 17.77, - "learning_rate": 8.801020408163265e-05, - "loss": 1.1044, + "epoch": 9.92, + "learning_rate": 7.52840909090909e-05, + "loss": 0.4797, "step": 878 }, { - "epoch": 17.79, - "learning_rate": 8.788265306122449e-05, - "loss": 1.117, + "epoch": 9.93, + "learning_rate": 7.514204545454546e-05, + "loss": 0.479, "step": 879 }, { - "epoch": 17.81, - "learning_rate": 8.775510204081632e-05, - "loss": 1.1262, + "epoch": 9.94, + "learning_rate": 7.500000000000001e-05, + "loss": 0.4727, "step": 880 }, { - "epoch": 17.83, - "learning_rate": 8.762755102040817e-05, - "loss": 1.0829, + "epoch": 9.95, + "learning_rate": 7.485795454545454e-05, + "loss": 0.4758, "step": 881 }, { - "epoch": 17.85, - "learning_rate": 8.75e-05, - "loss": 1.1393, + "epoch": 9.96, + "learning_rate": 7.47159090909091e-05, + "loss": 0.482, "step": 882 }, { - "epoch": 17.87, - "learning_rate": 8.737244897959183e-05, - "loss": 1.1781, + "epoch": 9.98, + "learning_rate": 7.457386363636364e-05, + "loss": 0.4951, "step": 883 }, { - "epoch": 17.89, - "learning_rate": 8.724489795918367e-05, - "loss": 1.1582, + "epoch": 9.99, + "learning_rate": 7.443181818181817e-05, + "loss": 0.4823, "step": 884 }, { - "epoch": 17.91, - "learning_rate": 8.711734693877552e-05, - "loss": 1.1469, + "epoch": 10.0, + "learning_rate": 7.428977272727273e-05, + "loss": 0.4638, "step": 885 }, { - "epoch": 17.93, - "learning_rate": 8.698979591836735e-05, - "loss": 1.1494, + "epoch": 10.01, + "learning_rate": 7.414772727272728e-05, + "loss": 0.4715, "step": 886 }, { - "epoch": 17.95, - "learning_rate": 8.68622448979592e-05, - "loss": 1.1251, + "epoch": 10.02, + "learning_rate": 7.400568181818182e-05, + "loss": 0.461, "step": 887 }, { - "epoch": 17.97, - "learning_rate": 8.673469387755102e-05, - "loss": 1.1624, + "epoch": 10.03, + "learning_rate": 7.386363636363637e-05, + "loss": 0.4429, "step": 888 }, { - "epoch": 17.99, - "learning_rate": 8.660714285714287e-05, - "loss": 1.0842, + "epoch": 10.04, + "learning_rate": 7.372159090909091e-05, + "loss": 0.4403, "step": 889 }, { - "epoch": 18.01, - "learning_rate": 8.64795918367347e-05, - "loss": 1.1944, + "epoch": 10.05, + "learning_rate": 7.357954545454546e-05, + "loss": 0.4519, "step": 890 }, { - "epoch": 18.03, - "learning_rate": 8.635204081632653e-05, - "loss": 1.0642, + "epoch": 10.07, + "learning_rate": 7.34375e-05, + "loss": 0.4611, "step": 891 }, { - "epoch": 18.05, - "learning_rate": 8.622448979591838e-05, - "loss": 1.0459, + "epoch": 10.08, + "learning_rate": 7.329545454545455e-05, + "loss": 0.4543, "step": 892 }, { - "epoch": 18.07, - "learning_rate": 8.60969387755102e-05, - "loss": 1.0941, + "epoch": 10.09, + "learning_rate": 7.315340909090909e-05, + "loss": 0.4528, "step": 893 }, { - "epoch": 18.09, - "learning_rate": 8.596938775510205e-05, - "loss": 1.0457, + "epoch": 10.1, + "learning_rate": 7.301136363636364e-05, + "loss": 0.4586, "step": 894 }, { - "epoch": 18.11, - "learning_rate": 8.584183673469388e-05, - "loss": 1.1033, + "epoch": 10.11, + "learning_rate": 7.286931818181818e-05, + "loss": 0.4418, "step": 895 }, { - "epoch": 18.13, - "learning_rate": 8.571428571428571e-05, - "loss": 1.0756, + "epoch": 10.12, + "learning_rate": 7.272727272727273e-05, + "loss": 0.4435, "step": 896 }, { - "epoch": 18.15, - "learning_rate": 8.558673469387756e-05, - "loss": 1.0615, + "epoch": 10.13, + "learning_rate": 7.258522727272727e-05, + "loss": 0.44, "step": 897 }, { - "epoch": 18.17, - "learning_rate": 8.545918367346939e-05, - "loss": 1.0828, + "epoch": 10.15, + "learning_rate": 7.244318181818183e-05, + "loss": 0.4589, "step": 898 }, { - "epoch": 18.19, - "learning_rate": 8.533163265306123e-05, - "loss": 1.1158, + "epoch": 10.16, + "learning_rate": 7.230113636363636e-05, + "loss": 0.4597, "step": 899 }, { - "epoch": 18.21, - "learning_rate": 8.520408163265306e-05, - "loss": 1.0133, + "epoch": 10.17, + "learning_rate": 7.215909090909091e-05, + "loss": 0.4479, "step": 900 } ], "logging_steps": 1, - "max_steps": 1568, - "num_train_epochs": 32, + "max_steps": 1408, + "num_train_epochs": 16, "save_steps": 100, - "total_flos": 1.3323843726343987e+18, + "total_flos": 1.227986631604562e+18, "trial_name": null, "trial_params": null } diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin index db23e07d097c18532e52f58a70eb72d22e39c8c1..ee7ddb867f05d9a969f71467a8eb88994865cf51 100644 --- a/checkpoint-900/training_args.bin +++ b/checkpoint-900/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b610cbc4242bb50b4985b00e205994ae514fec6d9e2273f2b545a583a07b154b +oid sha256:dc6a4742808b4bf3d45f92b24bdf7431a361a91d28d7901c45cf6a7781b8ab12 size 4155 diff --git a/runs/Sep07_21-11-43_ThanhHa/events.out.tfevents.1694095915.ThanhHa.394731.0 b/runs/Sep07_21-11-43_ThanhHa/events.out.tfevents.1694095915.ThanhHa.394731.0 new file mode 100644 index 0000000000000000000000000000000000000000..ce803b244499d000f6f744c31af2b7cb7f2cc5c1 --- /dev/null +++ b/runs/Sep07_21-11-43_ThanhHa/events.out.tfevents.1694095915.ThanhHa.394731.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8dad27dbe334a581ee472196ecf556186455fd7c690be38598bc5204a7e336e +size 5116 diff --git a/runs/Sep07_22-33-24_ThanhHa/events.out.tfevents.1694100814.ThanhHa.395502.0 b/runs/Sep07_22-33-24_ThanhHa/events.out.tfevents.1694100814.ThanhHa.395502.0 new file mode 100644 index 0000000000000000000000000000000000000000..2748866b8ad3129f0bc47e506541af1618637c3c --- /dev/null +++ b/runs/Sep07_22-33-24_ThanhHa/events.out.tfevents.1694100814.ThanhHa.395502.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87710e6aaeb17305070659ecc60eaf6bfd088ebccd3ca76cf841c30caf34b61 +size 225837