diff --git a/README.md b/README.md
index 3b0f0e952850a23210ed3e451c43bf1fff2a0a7a..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,218 @@
---
-license: agpl-3.0
+library_name: peft
+base_model: mistralai/Mixtral-8x7B-v0.1
---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+## Training procedure
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+
+### Framework versions
+
+- PEFT 0.7.0
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,32 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 64,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "k_proj",
+ "w1",
+ "gate",
+ "w2",
+ "q_proj",
+ "w3",
+ "o_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..39e3ef00a3d1f840c0bad9ae799106645d481de6
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25a8dc900c45c16fe4060c56a74bcce93fd948f888817926ff3324f571a9d29c
+size 3875879784
diff --git a/checkpoint-2499/README.md b/checkpoint-2499/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda
--- /dev/null
+++ b/checkpoint-2499/README.md
@@ -0,0 +1,218 @@
+---
+library_name: peft
+base_model: mistralai/Mixtral-8x7B-v0.1
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+## Training procedure
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+
+### Framework versions
+
+- PEFT 0.7.0
\ No newline at end of file
diff --git a/checkpoint-2499/README.md:com.dropbox.attrs b/checkpoint-2499/README.md:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..1808e0b593d04494a557f6700a0a684b4dea91a4
Binary files /dev/null and b/checkpoint-2499/README.md:com.dropbox.attrs differ
diff --git a/checkpoint-2499/adapter_config.json b/checkpoint-2499/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a
--- /dev/null
+++ b/checkpoint-2499/adapter_config.json
@@ -0,0 +1,32 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 64,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "k_proj",
+ "w1",
+ "gate",
+ "w2",
+ "q_proj",
+ "w3",
+ "o_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-2499/adapter_config.json:com.dropbox.attrs b/checkpoint-2499/adapter_config.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..00aecb26753e4aea65baffa2d0881f0c47b1bd2c
Binary files /dev/null and b/checkpoint-2499/adapter_config.json:com.dropbox.attrs differ
diff --git a/checkpoint-2499/adapter_model.safetensors b/checkpoint-2499/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9223c1ecabcbcb75ec367af6667223c798711e15
--- /dev/null
+++ b/checkpoint-2499/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d145f0793c75b00a347852ac3d1ff74246478adc3d2fbd80da5134d7c45216b3
+size 3875879784
diff --git a/checkpoint-2499/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-2499/adapter_model.safetensors:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..f897259a0e557e63001059025c0d0379605e90fb
Binary files /dev/null and b/checkpoint-2499/adapter_model.safetensors:com.dropbox.attrs differ
diff --git a/checkpoint-2499/optimizer.pt b/checkpoint-2499/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d32e15c7914c85a2a8be4364ccb3a2c4c4a4a871
--- /dev/null
+++ b/checkpoint-2499/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b19ff3b3f8e900f1a64f6fca67dbbce1331401ef929eae3800b067cfc39c4f7
+size 1943844127
diff --git a/checkpoint-2499/optimizer.pt:com.dropbox.attrs b/checkpoint-2499/optimizer.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..f383fb65bb72bdde60e257709707d96722ad58dc
Binary files /dev/null and b/checkpoint-2499/optimizer.pt:com.dropbox.attrs differ
diff --git a/checkpoint-2499/rng_state.pth b/checkpoint-2499/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ebb27732da7a38b431129ebf31376f218c83f61e
--- /dev/null
+++ b/checkpoint-2499/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3674ba216d6f7f8897c3829f726223c3519635c58acbcadefc26dde709002fdd
+size 14575
diff --git a/checkpoint-2499/rng_state.pth:com.dropbox.attrs b/checkpoint-2499/rng_state.pth:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..1cc85d53bf48bf264e31895a66cbd48007d774c4
Binary files /dev/null and b/checkpoint-2499/rng_state.pth:com.dropbox.attrs differ
diff --git a/checkpoint-2499/scheduler.pt b/checkpoint-2499/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f1a3892702919ebf655e78e56235f9a67f56091
--- /dev/null
+++ b/checkpoint-2499/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf09374ab62b3b49f9a578d2d4aee7daeb894d5ef4fb18b178a6d45b26190dc1
+size 627
diff --git a/checkpoint-2499/scheduler.pt:com.dropbox.attrs b/checkpoint-2499/scheduler.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..ae9c8204d36673eb197b758ca794f99bc8c4e03f
Binary files /dev/null and b/checkpoint-2499/scheduler.pt:com.dropbox.attrs differ
diff --git a/checkpoint-2499/trainer_state.json b/checkpoint-2499/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..decb845f8699fad9a5dc10c0342ab19c70c2d2a6
--- /dev/null
+++ b/checkpoint-2499/trainer_state.json
@@ -0,0 +1,15111 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.972388955582233,
+ "eval_steps": 209,
+ "global_step": 2499,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0,
+ "learning_rate": 2e-05,
+ "loss": 2.1426,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "eval_loss": 2.071432113647461,
+ "eval_runtime": 279.6718,
+ "eval_samples_per_second": 0.737,
+ "eval_steps_per_second": 0.737,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4e-05,
+ "loss": 2.4033,
+ "step": 2
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 6e-05,
+ "loss": 2.1893,
+ "step": 3
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 8e-05,
+ "loss": 2.3226,
+ "step": 4
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0001,
+ "loss": 2.2485,
+ "step": 5
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00012,
+ "loss": 1.9704,
+ "step": 6
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00014,
+ "loss": 1.6929,
+ "step": 7
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00016,
+ "loss": 2.2957,
+ "step": 8
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00018,
+ "loss": 1.9907,
+ "step": 9
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0002,
+ "loss": 2.1295,
+ "step": 10
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999999287109068,
+ "loss": 2.2249,
+ "step": 11
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999997148436365,
+ "loss": 2.1733,
+ "step": 12
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.000199999935839822,
+ "loss": 2.1404,
+ "step": 13
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999988593747084,
+ "loss": 2.0236,
+ "step": 14
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999982177731722,
+ "loss": 1.9639,
+ "step": 15
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999974335937034,
+ "loss": 1.692,
+ "step": 16
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999965068364137,
+ "loss": 2.3609,
+ "step": 17
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999954375014348,
+ "loss": 2.3553,
+ "step": 18
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999942255889198,
+ "loss": 1.5733,
+ "step": 19
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999928710990412,
+ "loss": 1.7505,
+ "step": 20
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999913740319922,
+ "loss": 2.3068,
+ "step": 21
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999897343879862,
+ "loss": 1.8371,
+ "step": 22
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.0001999987952167257,
+ "loss": 1.9852,
+ "step": 23
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999860273700585,
+ "loss": 1.9625,
+ "step": 24
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999839599966655,
+ "loss": 2.1089,
+ "step": 25
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999817500473724,
+ "loss": 2.1086,
+ "step": 26
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999793975224945,
+ "loss": 2.0284,
+ "step": 27
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999769024223673,
+ "loss": 2.3641,
+ "step": 28
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999742647473464,
+ "loss": 1.963,
+ "step": 29
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999714844978078,
+ "loss": 2.0635,
+ "step": 30
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999968561674148,
+ "loss": 1.9304,
+ "step": 31
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999654962767839,
+ "loss": 1.4124,
+ "step": 32
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999622883061518,
+ "loss": 2.1444,
+ "step": 33
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999589377627102,
+ "loss": 1.6477,
+ "step": 34
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999955444646936,
+ "loss": 2.2601,
+ "step": 35
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999518089593282,
+ "loss": 1.6256,
+ "step": 36
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999948030700404,
+ "loss": 1.9155,
+ "step": 37
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999441098707025,
+ "loss": 2.1408,
+ "step": 38
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999400464707832,
+ "loss": 2.104,
+ "step": 39
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.0001999935840501225,
+ "loss": 1.9841,
+ "step": 40
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999314919626272,
+ "loss": 1.5924,
+ "step": 41
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999270008556108,
+ "loss": 1.9956,
+ "step": 42
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999223671808154,
+ "loss": 1.4673,
+ "step": 43
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999175909389018,
+ "loss": 2.1595,
+ "step": 44
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999126721305513,
+ "loss": 1.8439,
+ "step": 45
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999076107564648,
+ "loss": 1.9961,
+ "step": 46
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999024068173638,
+ "loss": 2.1504,
+ "step": 47
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998970603139912,
+ "loss": 2.2907,
+ "step": 48
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999891571247108,
+ "loss": 1.5709,
+ "step": 49
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999885939617498,
+ "loss": 2.4504,
+ "step": 50
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998801654259632,
+ "loss": 2.3787,
+ "step": 51
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999874248673328,
+ "loss": 2.0434,
+ "step": 52
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998681893604347,
+ "loss": 2.1671,
+ "step": 53
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999861987488148,
+ "loss": 1.7432,
+ "step": 54
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998556430573521,
+ "loss": 1.7737,
+ "step": 55
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998491560689513,
+ "loss": 2.0122,
+ "step": 56
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999842526523871,
+ "loss": 1.7545,
+ "step": 57
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998357544230558,
+ "loss": 2.201,
+ "step": 58
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998288397674716,
+ "loss": 2.0396,
+ "step": 59
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999821782558104,
+ "loss": 1.9275,
+ "step": 60
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998145827959598,
+ "loss": 1.7797,
+ "step": 61
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999807240482065,
+ "loss": 2.1463,
+ "step": 62
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997997556174665,
+ "loss": 1.935,
+ "step": 63
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999792128203232,
+ "loss": 2.1182,
+ "step": 64
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999784358240448,
+ "loss": 2.2297,
+ "step": 65
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997764457302234,
+ "loss": 2.1052,
+ "step": 66
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999768390673686,
+ "loss": 2.0777,
+ "step": 67
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997601930719835,
+ "loss": 2.1419,
+ "step": 68
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999751852926286,
+ "loss": 2.2586,
+ "step": 69
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997433702377817,
+ "loss": 1.9089,
+ "step": 70
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997347450076801,
+ "loss": 2.0587,
+ "step": 71
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997259772372116,
+ "loss": 2.4143,
+ "step": 72
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997170669276256,
+ "loss": 1.947,
+ "step": 73
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997080140801932,
+ "loss": 2.008,
+ "step": 74
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996988186962041,
+ "loss": 2.4912,
+ "step": 75
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996894807769707,
+ "loss": 2.0279,
+ "step": 76
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996800003238232,
+ "loss": 1.9914,
+ "step": 77
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.0001999670377338114,
+ "loss": 1.9091,
+ "step": 78
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996606118212148,
+ "loss": 1.8038,
+ "step": 79
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996507037745183,
+ "loss": 2.3573,
+ "step": 80
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996406531994364,
+ "loss": 2.3204,
+ "step": 81
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999630460097403,
+ "loss": 2.1619,
+ "step": 82
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999620124469871,
+ "loss": 1.9977,
+ "step": 83
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996096463183142,
+ "loss": 2.195,
+ "step": 84
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995990256442263,
+ "loss": 1.9909,
+ "step": 85
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995882624491217,
+ "loss": 2.2001,
+ "step": 86
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995773567345354,
+ "loss": 1.5795,
+ "step": 87
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995663085020212,
+ "loss": 2.174,
+ "step": 88
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995551177531557,
+ "loss": 1.9605,
+ "step": 89
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995437844895334,
+ "loss": 2.1768,
+ "step": 90
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999532308712771,
+ "loss": 1.6906,
+ "step": 91
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995206904245037,
+ "loss": 2.1029,
+ "step": 92
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995089296263893,
+ "loss": 2.0652,
+ "step": 93
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019994970263201035,
+ "loss": 2.1733,
+ "step": 94
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999484980507344,
+ "loss": 1.9413,
+ "step": 95
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999472792189828,
+ "loss": 1.9538,
+ "step": 96
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994604613692935,
+ "loss": 2.4158,
+ "step": 97
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994479880474988,
+ "loss": 1.8964,
+ "step": 98
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999435372226222,
+ "loss": 2.3135,
+ "step": 99
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999422613907262,
+ "loss": 2.127,
+ "step": 100
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994097130924374,
+ "loss": 1.9954,
+ "step": 101
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993966697835883,
+ "loss": 2.1363,
+ "step": 102
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993834839825738,
+ "loss": 1.7779,
+ "step": 103
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993701556912742,
+ "loss": 2.0923,
+ "step": 104
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993566849115898,
+ "loss": 1.9183,
+ "step": 105
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993430716454413,
+ "loss": 1.7894,
+ "step": 106
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993293158947694,
+ "loss": 2.0094,
+ "step": 107
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999315417661536,
+ "loss": 2.1469,
+ "step": 108
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999301376947722,
+ "loss": 1.6924,
+ "step": 109
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999287193755329,
+ "loss": 2.1794,
+ "step": 110
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.000199927286808638,
+ "loss": 2.1338,
+ "step": 111
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019992583999429178,
+ "loss": 1.9988,
+ "step": 112
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999243789327004,
+ "loss": 2.0735,
+ "step": 113
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999229036240723,
+ "loss": 2.0521,
+ "step": 114
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019992141406861776,
+ "loss": 1.9441,
+ "step": 115
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991991026654918,
+ "loss": 2.1244,
+ "step": 116
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999183922180809,
+ "loss": 1.7937,
+ "step": 117
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999168599234295,
+ "loss": 2.2603,
+ "step": 118
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991531338281332,
+ "loss": 2.1846,
+ "step": 119
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991375259645293,
+ "loss": 2.3241,
+ "step": 120
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991217756457085,
+ "loss": 2.0926,
+ "step": 121
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991058828739165,
+ "loss": 2.0092,
+ "step": 122
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990898476514193,
+ "loss": 1.8076,
+ "step": 123
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990736699805029,
+ "loss": 2.0369,
+ "step": 124
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990573498634742,
+ "loss": 2.0488,
+ "step": 125
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.000199904088730266,
+ "loss": 2.1534,
+ "step": 126
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990242823004074,
+ "loss": 2.1406,
+ "step": 127
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990075348590839,
+ "loss": 1.9379,
+ "step": 128
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019989906449810775,
+ "loss": 1.9781,
+ "step": 129
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989736126687963,
+ "loss": 1.973,
+ "step": 130
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989564379246683,
+ "loss": 1.6825,
+ "step": 131
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989391207511428,
+ "loss": 2.0843,
+ "step": 132
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989216611506887,
+ "loss": 1.8547,
+ "step": 133
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989040591257952,
+ "loss": 1.7626,
+ "step": 134
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.0001998886314678972,
+ "loss": 2.0531,
+ "step": 135
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988684278127497,
+ "loss": 2.0031,
+ "step": 136
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988503985296773,
+ "loss": 1.9342,
+ "step": 137
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988322268323268,
+ "loss": 2.3297,
+ "step": 138
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988139127232878,
+ "loss": 2.3401,
+ "step": 139
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987954562051725,
+ "loss": 1.8983,
+ "step": 140
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.0001998776857280612,
+ "loss": 2.0621,
+ "step": 141
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987581159522578,
+ "loss": 2.0574,
+ "step": 142
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987392322227824,
+ "loss": 1.9516,
+ "step": 143
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987202060948783,
+ "loss": 2.1402,
+ "step": 144
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987010375712577,
+ "loss": 1.8903,
+ "step": 145
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986817266546539,
+ "loss": 1.8248,
+ "step": 146
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986622733478204,
+ "loss": 1.9877,
+ "step": 147
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986426776535306,
+ "loss": 1.6272,
+ "step": 148
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986229395745785,
+ "loss": 1.8605,
+ "step": 149
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986030591137783,
+ "loss": 1.6848,
+ "step": 150
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985830362739647,
+ "loss": 2.1922,
+ "step": 151
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998562871057992,
+ "loss": 2.0238,
+ "step": 152
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998542563468736,
+ "loss": 2.2246,
+ "step": 153
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985221135090914,
+ "loss": 1.9438,
+ "step": 154
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019985015211819744,
+ "loss": 2.2136,
+ "step": 155
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998480786490321,
+ "loss": 2.4563,
+ "step": 156
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984599094370874,
+ "loss": 2.2138,
+ "step": 157
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984388900252503,
+ "loss": 2.2679,
+ "step": 158
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984177282578064,
+ "loss": 1.9537,
+ "step": 159
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998396424137773,
+ "loss": 2.0803,
+ "step": 160
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998374977668188,
+ "loss": 2.0282,
+ "step": 161
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019983533888521087,
+ "loss": 2.0157,
+ "step": 162
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998331657692613,
+ "loss": 1.7837,
+ "step": 163
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019983097841928,
+ "loss": 2.1556,
+ "step": 164
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982877683557879,
+ "loss": 2.1447,
+ "step": 165
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982656101847162,
+ "loss": 2.4139,
+ "step": 166
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998243309682743,
+ "loss": 1.6788,
+ "step": 167
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982208668530493,
+ "loss": 1.9008,
+ "step": 168
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998198281698834,
+ "loss": 2.173,
+ "step": 169
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019981755542233177,
+ "loss": 2.1837,
+ "step": 170
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981526844297404,
+ "loss": 2.0639,
+ "step": 171
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981296723213632,
+ "loss": 2.3864,
+ "step": 172
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981065179014673,
+ "loss": 1.923,
+ "step": 173
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980832211733535,
+ "loss": 1.9192,
+ "step": 174
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980597821403438,
+ "loss": 2.0335,
+ "step": 175
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.000199803620080578,
+ "loss": 1.8172,
+ "step": 176
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.0001998012477173024,
+ "loss": 2.0294,
+ "step": 177
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979886112454586,
+ "loss": 2.2889,
+ "step": 178
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979646030264867,
+ "loss": 1.8498,
+ "step": 179
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997940452519531,
+ "loss": 2.0797,
+ "step": 180
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997916159728035,
+ "loss": 2.2356,
+ "step": 181
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997891724655462,
+ "loss": 2.1187,
+ "step": 182
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978671473052964,
+ "loss": 1.9301,
+ "step": 183
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978424276810423,
+ "loss": 1.8582,
+ "step": 184
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997817565786224,
+ "loss": 2.144,
+ "step": 185
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977925616243862,
+ "loss": 2.0595,
+ "step": 186
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977674151990945,
+ "loss": 1.9104,
+ "step": 187
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977421265139332,
+ "loss": 1.9727,
+ "step": 188
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977166955725088,
+ "loss": 1.8727,
+ "step": 189
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997691122378447,
+ "loss": 2.0611,
+ "step": 190
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997665406935394,
+ "loss": 2.0745,
+ "step": 191
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997639549247016,
+ "loss": 1.9974,
+ "step": 192
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019976135493169996,
+ "loss": 1.9856,
+ "step": 193
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975874071490526,
+ "loss": 1.778,
+ "step": 194
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975611227469016,
+ "loss": 1.8347,
+ "step": 195
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997534696114294,
+ "loss": 1.5555,
+ "step": 196
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019975081272549989,
+ "loss": 1.5625,
+ "step": 197
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974814161728032,
+ "loss": 1.9997,
+ "step": 198
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974545628715157,
+ "loss": 1.9523,
+ "step": 199
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974275673549654,
+ "loss": 2.1557,
+ "step": 200
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974004296270006,
+ "loss": 1.8306,
+ "step": 201
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973731496914914,
+ "loss": 2.0051,
+ "step": 202
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973457275523264,
+ "loss": 2.201,
+ "step": 203
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997318163213416,
+ "loss": 2.2446,
+ "step": 204
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972904566786903,
+ "loss": 2.1172,
+ "step": 205
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972626079520995,
+ "loss": 1.9849,
+ "step": 206
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972346170376142,
+ "loss": 1.9774,
+ "step": 207
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.0001997206483939225,
+ "loss": 1.7625,
+ "step": 208
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971782086609436,
+ "loss": 2.2346,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "eval_loss": 2.00066876411438,
+ "eval_runtime": 282.7648,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971497912068013,
+ "loss": 2.4185,
+ "step": 210
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971212315808497,
+ "loss": 1.946,
+ "step": 211
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019970925297871605,
+ "loss": 2.0049,
+ "step": 212
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970636858298267,
+ "loss": 1.9545,
+ "step": 213
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970346997129598,
+ "loss": 1.9636,
+ "step": 214
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970055714406938,
+ "loss": 1.9068,
+ "step": 215
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969763010171807,
+ "loss": 1.5749,
+ "step": 216
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969468884465942,
+ "loss": 1.7676,
+ "step": 217
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996917333733128,
+ "loss": 2.0329,
+ "step": 218
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996887636880996,
+ "loss": 1.9307,
+ "step": 219
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019968577978944323,
+ "loss": 2.134,
+ "step": 220
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019968278167776908,
+ "loss": 2.0911,
+ "step": 221
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967976935350467,
+ "loss": 2.5057,
+ "step": 222
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.0001996767428170795,
+ "loss": 1.9267,
+ "step": 223
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967370206892503,
+ "loss": 2.3569,
+ "step": 224
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967064710947488,
+ "loss": 1.992,
+ "step": 225
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966757793916454,
+ "loss": 2.01,
+ "step": 226
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966449455843165,
+ "loss": 1.8037,
+ "step": 227
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966139696771587,
+ "loss": 2.2498,
+ "step": 228
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019965828516745876,
+ "loss": 1.6563,
+ "step": 229
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996551591581041,
+ "loss": 1.979,
+ "step": 230
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996520189400975,
+ "loss": 2.1553,
+ "step": 231
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996488645138867,
+ "loss": 1.8743,
+ "step": 232
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964569587992148,
+ "loss": 2.1907,
+ "step": 233
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964251303865362,
+ "loss": 2.0644,
+ "step": 234
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019963931599053692,
+ "loss": 2.1721,
+ "step": 235
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996361047360272,
+ "loss": 2.2267,
+ "step": 236
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996328792755823,
+ "loss": 1.9445,
+ "step": 237
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019962963960966213,
+ "loss": 2.2003,
+ "step": 238
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996263857387286,
+ "loss": 2.3114,
+ "step": 239
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996231176632456,
+ "loss": 1.8553,
+ "step": 240
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961983538367914,
+ "loss": 2.1349,
+ "step": 241
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961653890049715,
+ "loss": 1.8784,
+ "step": 242
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996132282141697,
+ "loss": 2.0118,
+ "step": 243
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960990332516874,
+ "loss": 1.9938,
+ "step": 244
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960656423396834,
+ "loss": 2.2582,
+ "step": 245
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019960321094104465,
+ "loss": 2.1807,
+ "step": 246
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959984344687578,
+ "loss": 1.9084,
+ "step": 247
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959646175194174,
+ "loss": 2.2879,
+ "step": 248
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995930658567248,
+ "loss": 1.942,
+ "step": 249
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958965576170908,
+ "loss": 2.1313,
+ "step": 250
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958623146738088,
+ "loss": 2.3202,
+ "step": 251
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995827929742283,
+ "loss": 1.7832,
+ "step": 252
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957934028274162,
+ "loss": 1.7103,
+ "step": 253
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957587339341321,
+ "loss": 1.9912,
+ "step": 254
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995723923067373,
+ "loss": 1.6686,
+ "step": 255
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956889702321023,
+ "loss": 1.966,
+ "step": 256
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956538754333034,
+ "loss": 2.2287,
+ "step": 257
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956186386759804,
+ "loss": 1.4866,
+ "step": 258
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995583259965157,
+ "loss": 1.9599,
+ "step": 259
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019955477393058773,
+ "loss": 1.9273,
+ "step": 260
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995512076703206,
+ "loss": 1.847,
+ "step": 261
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019954762721622279,
+ "loss": 2.0535,
+ "step": 262
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995440325688048,
+ "loss": 2.4403,
+ "step": 263
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019954042372857908,
+ "loss": 1.8712,
+ "step": 264
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953680069606026,
+ "loss": 2.1837,
+ "step": 265
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953316347176488,
+ "loss": 2.0398,
+ "step": 266
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995295120562115,
+ "loss": 2.1135,
+ "step": 267
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952584644992075,
+ "loss": 2.0358,
+ "step": 268
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952216665341526,
+ "loss": 2.3282,
+ "step": 269
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995184726672197,
+ "loss": 1.9741,
+ "step": 270
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951476449186074,
+ "loss": 1.7523,
+ "step": 271
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951104212786712,
+ "loss": 2.1509,
+ "step": 272
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001995073055757695,
+ "loss": 2.0865,
+ "step": 273
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019950355483610067,
+ "loss": 1.8972,
+ "step": 274
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019949978990939542,
+ "loss": 2.4693,
+ "step": 275
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994960107961905,
+ "loss": 1.9307,
+ "step": 276
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994922174970248,
+ "loss": 2.0097,
+ "step": 277
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994884100124391,
+ "loss": 1.6561,
+ "step": 278
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994845883429763,
+ "loss": 2.3069,
+ "step": 279
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019948075248918124,
+ "loss": 2.0134,
+ "step": 280
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947690245160091,
+ "loss": 2.1061,
+ "step": 281
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947303823078416,
+ "loss": 2.0855,
+ "step": 282
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946915982728197,
+ "loss": 1.5672,
+ "step": 283
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.0001994652672416473,
+ "loss": 1.7289,
+ "step": 284
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946136047443522,
+ "loss": 1.9013,
+ "step": 285
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945743952620268,
+ "loss": 2.3105,
+ "step": 286
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945350439750872,
+ "loss": 2.341,
+ "step": 287
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944955508891443,
+ "loss": 1.88,
+ "step": 288
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994455916009829,
+ "loss": 1.913,
+ "step": 289
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944161393427922,
+ "loss": 1.9513,
+ "step": 290
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943762208937053,
+ "loss": 2.3331,
+ "step": 291
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943361606682597,
+ "loss": 2.3024,
+ "step": 292
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942959586721672,
+ "loss": 2.2222,
+ "step": 293
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942556149111598,
+ "loss": 2.1003,
+ "step": 294
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994215129390989,
+ "loss": 1.9038,
+ "step": 295
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941745021174282,
+ "loss": 1.6068,
+ "step": 296
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941337330962693,
+ "loss": 1.8894,
+ "step": 297
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940928223333252,
+ "loss": 2.3158,
+ "step": 298
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.0001994051769834429,
+ "loss": 2.1015,
+ "step": 299
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940105756054337,
+ "loss": 2.1519,
+ "step": 300
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939692396522127,
+ "loss": 1.7233,
+ "step": 301
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939277619806598,
+ "loss": 1.85,
+ "step": 302
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938861425966887,
+ "loss": 2.2368,
+ "step": 303
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938443815062335,
+ "loss": 1.765,
+ "step": 304
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993802478715248,
+ "loss": 1.6333,
+ "step": 305
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937604342297073,
+ "loss": 2.191,
+ "step": 306
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937182480556055,
+ "loss": 2.2402,
+ "step": 307
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019936759201989577,
+ "loss": 2.0568,
+ "step": 308
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993633450665799,
+ "loss": 2.4314,
+ "step": 309
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935908394621844,
+ "loss": 2.0556,
+ "step": 310
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935480865941894,
+ "loss": 2.0988,
+ "step": 311
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935051920679094,
+ "loss": 2.0964,
+ "step": 312
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019934621558894607,
+ "loss": 1.9365,
+ "step": 313
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993418978064979,
+ "loss": 1.6224,
+ "step": 314
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933756586006202,
+ "loss": 2.144,
+ "step": 315
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933321975025616,
+ "loss": 2.2899,
+ "step": 316
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019932885947769992,
+ "loss": 1.8865,
+ "step": 317
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.000199324485043015,
+ "loss": 2.3996,
+ "step": 318
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993200964468251,
+ "loss": 1.3858,
+ "step": 319
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019931569368975588,
+ "loss": 2.2231,
+ "step": 320
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019931127677243516,
+ "loss": 2.0537,
+ "step": 321
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930684569549264,
+ "loss": 2.1381,
+ "step": 322
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930240045956012,
+ "loss": 2.0152,
+ "step": 323
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992979410652714,
+ "loss": 2.0293,
+ "step": 324
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019929346751326228,
+ "loss": 1.7457,
+ "step": 325
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928897980417057,
+ "loss": 1.987,
+ "step": 326
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928447793863616,
+ "loss": 2.2451,
+ "step": 327
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019927996191730093,
+ "loss": 2.3312,
+ "step": 328
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992754317408087,
+ "loss": 1.8771,
+ "step": 329
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992708874098054,
+ "loss": 1.833,
+ "step": 330
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926632892493896,
+ "loss": 1.9343,
+ "step": 331
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926175628685937,
+ "loss": 2.2328,
+ "step": 332
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992571694962185,
+ "loss": 1.9916,
+ "step": 333
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992525685536704,
+ "loss": 1.9497,
+ "step": 334
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.000199247953459871,
+ "loss": 2.029,
+ "step": 335
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019924332421547835,
+ "loss": 2.0326,
+ "step": 336
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992386808211525,
+ "loss": 2.6406,
+ "step": 337
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019923402327755546,
+ "loss": 2.3811,
+ "step": 338
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922935158535129,
+ "loss": 1.6143,
+ "step": 339
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922466574520608,
+ "loss": 2.2182,
+ "step": 340
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921996575778794,
+ "loss": 2.218,
+ "step": 341
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.000199215251623767,
+ "loss": 1.8615,
+ "step": 342
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921052334381534,
+ "loss": 2.165,
+ "step": 343
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019920578091860716,
+ "loss": 2.1627,
+ "step": 344
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.0001992010243488186,
+ "loss": 2.154,
+ "step": 345
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919625363512786,
+ "loss": 1.5966,
+ "step": 346
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919146877821512,
+ "loss": 2.0903,
+ "step": 347
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991866697787626,
+ "loss": 2.2322,
+ "step": 348
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019918185663745456,
+ "loss": 1.9319,
+ "step": 349
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917702935497725,
+ "loss": 2.1367,
+ "step": 350
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917218793201886,
+ "loss": 2.1767,
+ "step": 351
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019916733236926976,
+ "loss": 2.1009,
+ "step": 352
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991624626674222,
+ "loss": 2.1286,
+ "step": 353
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991575788271705,
+ "loss": 2.181,
+ "step": 354
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019915268084921101,
+ "loss": 2.12,
+ "step": 355
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019914776873424206,
+ "loss": 1.9895,
+ "step": 356
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.000199142842482964,
+ "loss": 1.9285,
+ "step": 357
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991379020960792,
+ "loss": 2.2376,
+ "step": 358
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991329475742921,
+ "loss": 2.1274,
+ "step": 359
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912797891830908,
+ "loss": 2.0043,
+ "step": 360
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912299612883852,
+ "loss": 2.022,
+ "step": 361
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019911799920659093,
+ "loss": 1.7343,
+ "step": 362
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991129881522787,
+ "loss": 2.0621,
+ "step": 363
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019910796296661632,
+ "loss": 1.5116,
+ "step": 364
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991029236503203,
+ "loss": 2.0485,
+ "step": 365
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909787020410907,
+ "loss": 1.971,
+ "step": 366
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909280262870324,
+ "loss": 1.9724,
+ "step": 367
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908772092482524,
+ "loss": 1.318,
+ "step": 368
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908262509319964,
+ "loss": 2.0539,
+ "step": 369
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019907751513455302,
+ "loss": 2.1097,
+ "step": 370
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019907239104961392,
+ "loss": 2.0632,
+ "step": 371
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906725283911296,
+ "loss": 2.1897,
+ "step": 372
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906210050378266,
+ "loss": 2.2002,
+ "step": 373
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905693404435773,
+ "loss": 1.9005,
+ "step": 374
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905175346157474,
+ "loss": 1.9873,
+ "step": 375
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904655875617233,
+ "loss": 1.7215,
+ "step": 376
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904134992889113,
+ "loss": 2.0434,
+ "step": 377
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903612698047383,
+ "loss": 2.4223,
+ "step": 378
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903088991166513,
+ "loss": 2.0837,
+ "step": 379
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902563872321172,
+ "loss": 2.2389,
+ "step": 380
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902037341586225,
+ "loss": 1.7205,
+ "step": 381
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001990150939903675,
+ "loss": 1.9577,
+ "step": 382
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019900980044748015,
+ "loss": 1.8778,
+ "step": 383
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.000199004492787955,
+ "loss": 2.2213,
+ "step": 384
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899917101254874,
+ "loss": 2.0927,
+ "step": 385
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899383512202019,
+ "loss": 2.2921,
+ "step": 386
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001989884851171301,
+ "loss": 2.2983,
+ "step": 387
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989831209986413,
+ "loss": 1.8052,
+ "step": 388
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897774276731857,
+ "loss": 1.7741,
+ "step": 389
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897235042392873,
+ "loss": 1.779,
+ "step": 390
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896694396924063,
+ "loss": 1.6924,
+ "step": 391
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896152340402509,
+ "loss": 2.036,
+ "step": 392
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019895608872905494,
+ "loss": 2.04,
+ "step": 393
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989506399451051,
+ "loss": 2.1702,
+ "step": 394
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019894517705295245,
+ "loss": 1.9429,
+ "step": 395
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893970005337584,
+ "loss": 2.0528,
+ "step": 396
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893420894715618,
+ "loss": 1.7906,
+ "step": 397
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989287037350764,
+ "loss": 2.3494,
+ "step": 398
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019892318441792138,
+ "loss": 1.7415,
+ "step": 399
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989176509964781,
+ "loss": 2.0184,
+ "step": 400
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989121034715355,
+ "loss": 1.9277,
+ "step": 401
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989065418438845,
+ "loss": 2.2168,
+ "step": 402
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019890096611431814,
+ "loss": 2.6114,
+ "step": 403
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019889537628363133,
+ "loss": 2.0713,
+ "step": 404
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888977235262104,
+ "loss": 2.2966,
+ "step": 405
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888415432208636,
+ "loss": 2.5206,
+ "step": 406
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887852219282822,
+ "loss": 2.4503,
+ "step": 407
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887287596564966,
+ "loss": 2.102,
+ "step": 408
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886721564135572,
+ "loss": 2.3275,
+ "step": 409
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886154122075343,
+ "loss": 2.0481,
+ "step": 410
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885585270465182,
+ "loss": 1.8395,
+ "step": 411
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885015009386202,
+ "loss": 2.3535,
+ "step": 412
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.000198844433389197,
+ "loss": 2.0147,
+ "step": 413
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988387025914719,
+ "loss": 2.1919,
+ "step": 414
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988329577015038,
+ "loss": 2.156,
+ "step": 415
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882719872011176,
+ "loss": 2.2672,
+ "step": 416
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882142564811694,
+ "loss": 2.3242,
+ "step": 417
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988156384863424,
+ "loss": 2.0259,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "eval_loss": 1.9941134452819824,
+ "eval_runtime": 282.533,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880983723561332,
+ "loss": 1.7039,
+ "step": 419
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880402189675678,
+ "loss": 2.1007,
+ "step": 420
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879819247060193,
+ "loss": 2.2297,
+ "step": 421
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879234895797996,
+ "loss": 1.6166,
+ "step": 422
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.000198786491359724,
+ "loss": 2.408,
+ "step": 423
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019878061967666915,
+ "loss": 1.686,
+ "step": 424
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.0001987747339096527,
+ "loss": 2.0492,
+ "step": 425
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876883405951377,
+ "loss": 2.2179,
+ "step": 426
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876292012709356,
+ "loss": 1.8812,
+ "step": 427
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019875699211323528,
+ "loss": 2.2888,
+ "step": 428
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019875105001878409,
+ "loss": 2.0561,
+ "step": 429
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019874509384458725,
+ "loss": 1.9299,
+ "step": 430
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873912359149397,
+ "loss": 2.1999,
+ "step": 431
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873313926035548,
+ "loss": 1.8509,
+ "step": 432
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019872714085202503,
+ "loss": 1.8281,
+ "step": 433
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987211283673578,
+ "loss": 1.8359,
+ "step": 434
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987151018072111,
+ "loss": 2.2844,
+ "step": 435
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870906117244416,
+ "loss": 1.9397,
+ "step": 436
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870300646391824,
+ "loss": 2.302,
+ "step": 437
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869693768249661,
+ "loss": 2.1176,
+ "step": 438
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869085482904458,
+ "loss": 2.1909,
+ "step": 439
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986847579044294,
+ "loss": 2.2382,
+ "step": 440
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867864690952035,
+ "loss": 2.0988,
+ "step": 441
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867252184518878,
+ "loss": 2.2136,
+ "step": 442
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986663827123079,
+ "loss": 1.9324,
+ "step": 443
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019866022951175308,
+ "loss": 2.1274,
+ "step": 444
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019865406224440165,
+ "loss": 1.8625,
+ "step": 445
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019864788091113287,
+ "loss": 2.0009,
+ "step": 446
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.0001986416855128281,
+ "loss": 2.2245,
+ "step": 447
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019863547605037063,
+ "loss": 2.0654,
+ "step": 448
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862925252464586,
+ "loss": 1.4339,
+ "step": 449
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862301493654108,
+ "loss": 2.1347,
+ "step": 450
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861676328694562,
+ "loss": 1.7029,
+ "step": 451
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861049757675088,
+ "loss": 2.0081,
+ "step": 452
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019860421780685018,
+ "loss": 1.9994,
+ "step": 453
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985979239781389,
+ "loss": 1.9325,
+ "step": 454
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019859161609151436,
+ "loss": 1.8502,
+ "step": 455
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.000198585294147876,
+ "loss": 2.3779,
+ "step": 456
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019857895814812509,
+ "loss": 2.0303,
+ "step": 457
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985726080931651,
+ "loss": 1.9898,
+ "step": 458
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019856624398390137,
+ "loss": 1.7648,
+ "step": 459
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019855986582124126,
+ "loss": 1.7822,
+ "step": 460
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985534736060942,
+ "loss": 1.9219,
+ "step": 461
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019854706733937155,
+ "loss": 2.1789,
+ "step": 462
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019854064702198675,
+ "loss": 1.9091,
+ "step": 463
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019853421265485514,
+ "loss": 1.9941,
+ "step": 464
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985277642388941,
+ "loss": 1.904,
+ "step": 465
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019852130177502316,
+ "loss": 1.6299,
+ "step": 466
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985148252641636,
+ "loss": 1.7712,
+ "step": 467
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019850833470723886,
+ "loss": 1.6825,
+ "step": 468
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985018301051744,
+ "loss": 1.7408,
+ "step": 469
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019849531145889758,
+ "loss": 2.0622,
+ "step": 470
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019848877876933784,
+ "loss": 1.5699,
+ "step": 471
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.0001984822320374266,
+ "loss": 2.0253,
+ "step": 472
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019847567126409724,
+ "loss": 2.2186,
+ "step": 473
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019846909645028523,
+ "loss": 2.0872,
+ "step": 474
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.000198462507596928,
+ "loss": 1.9362,
+ "step": 475
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019845590470496497,
+ "loss": 2.4109,
+ "step": 476
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844928777533753,
+ "loss": 2.2626,
+ "step": 477
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844265680898918,
+ "loss": 2.0874,
+ "step": 478
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984360118068653,
+ "loss": 2.1606,
+ "step": 479
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984293527699133,
+ "loss": 2.063,
+ "step": 480
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019842267969908265,
+ "loss": 1.9065,
+ "step": 481
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984159925953248,
+ "loss": 1.9511,
+ "step": 482
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840929145959317,
+ "loss": 2.056,
+ "step": 483
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840257629284317,
+ "loss": 2.2353,
+ "step": 484
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019839584709603226,
+ "loss": 1.9401,
+ "step": 485
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001983891038701199,
+ "loss": 1.9648,
+ "step": 486
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019838234661606748,
+ "loss": 1.753,
+ "step": 487
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019837557533483846,
+ "loss": 1.7805,
+ "step": 488
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836879002739827,
+ "loss": 2.192,
+ "step": 489
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836199069471437,
+ "loss": 1.9112,
+ "step": 490
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019835517733775615,
+ "loss": 2.0119,
+ "step": 491
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.0001983483499574951,
+ "loss": 1.8932,
+ "step": 492
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019834150855490464,
+ "loss": 1.5968,
+ "step": 493
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019833465313096017,
+ "loss": 2.1493,
+ "step": 494
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019832778368663917,
+ "loss": 1.8863,
+ "step": 495
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.000198320900222921,
+ "loss": 2.2134,
+ "step": 496
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019831400274078717,
+ "loss": 2.2831,
+ "step": 497
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019830709124122112,
+ "loss": 2.0266,
+ "step": 498
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001983001657252082,
+ "loss": 2.3392,
+ "step": 499
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019829322619373588,
+ "loss": 1.8426,
+ "step": 500
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019828627264779363,
+ "loss": 2.0742,
+ "step": 501
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001982793050883728,
+ "loss": 1.9578,
+ "step": 502
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019827232351646686,
+ "loss": 2.0863,
+ "step": 503
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982653279330712,
+ "loss": 2.2881,
+ "step": 504
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019825831833918323,
+ "loss": 1.8869,
+ "step": 505
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982512947358024,
+ "loss": 1.8997,
+ "step": 506
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019824425712393012,
+ "loss": 1.8945,
+ "step": 507
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019823720550456977,
+ "loss": 1.9496,
+ "step": 508
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982301398787268,
+ "loss": 2.1066,
+ "step": 509
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019822306024740852,
+ "loss": 1.958,
+ "step": 510
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019821596661162447,
+ "loss": 2.1112,
+ "step": 511
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019820885897238596,
+ "loss": 2.1012,
+ "step": 512
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001982017373307064,
+ "loss": 2.2623,
+ "step": 513
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019819460168760117,
+ "loss": 2.5058,
+ "step": 514
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981874520440877,
+ "loss": 2.1367,
+ "step": 515
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019818028840118532,
+ "loss": 2.2743,
+ "step": 516
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019817311075991543,
+ "loss": 1.5517,
+ "step": 517
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981659191213014,
+ "loss": 1.9569,
+ "step": 518
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815871348636863,
+ "loss": 2.0566,
+ "step": 519
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815149385614444,
+ "loss": 1.8859,
+ "step": 520
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019814426023165825,
+ "loss": 2.0298,
+ "step": 521
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019813701261394136,
+ "loss": 2.0614,
+ "step": 522
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812975100402715,
+ "loss": 2.221,
+ "step": 523
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812247540295096,
+ "loss": 2.1255,
+ "step": 524
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019811518581175014,
+ "loss": 2.1885,
+ "step": 525
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.000198107882231464,
+ "loss": 2.3918,
+ "step": 526
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019810056466313392,
+ "loss": 2.2759,
+ "step": 527
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019809323310780318,
+ "loss": 1.9727,
+ "step": 528
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980858875665171,
+ "loss": 2.0417,
+ "step": 529
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019807852804032305,
+ "loss": 1.645,
+ "step": 530
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980711545302703,
+ "loss": 1.7943,
+ "step": 531
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019806376703741015,
+ "loss": 1.8844,
+ "step": 532
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019805636556279588,
+ "loss": 2.1128,
+ "step": 533
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980489501074828,
+ "loss": 2.0272,
+ "step": 534
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019804152067252816,
+ "loss": 2.0916,
+ "step": 535
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019803407725899131,
+ "loss": 1.7287,
+ "step": 536
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019802661986793342,
+ "loss": 2.0667,
+ "step": 537
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801914850041784,
+ "loss": 2.4016,
+ "step": 538
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801166315750978,
+ "loss": 1.8557,
+ "step": 539
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.0001980041638402765,
+ "loss": 1.8072,
+ "step": 540
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019799665054978722,
+ "loss": 2.2252,
+ "step": 541
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798912328711322,
+ "loss": 2.1377,
+ "step": 542
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798158205332764,
+ "loss": 2.0306,
+ "step": 543
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019797402684950576,
+ "loss": 1.7428,
+ "step": 544
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019796645767672477,
+ "loss": 2.0843,
+ "step": 545
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795887453606388,
+ "loss": 1.9175,
+ "step": 546
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795127742860423,
+ "loss": 1.6673,
+ "step": 547
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.0001979436663554291,
+ "loss": 1.5553,
+ "step": 548
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019793604131762357,
+ "loss": 1.604,
+ "step": 549
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792840231627482,
+ "loss": 2.023,
+ "step": 550
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792074935247206,
+ "loss": 1.8399,
+ "step": 551
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019791308242730638,
+ "loss": 1.8579,
+ "step": 552
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019790540154187094,
+ "loss": 2.2135,
+ "step": 553
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019789770669726087,
+ "loss": 1.7894,
+ "step": 554
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788999789457326,
+ "loss": 2.1723,
+ "step": 555
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788227513490723,
+ "loss": 2.0881,
+ "step": 556
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019787453841936393,
+ "loss": 1.7181,
+ "step": 557
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019786678774904638,
+ "loss": 1.8725,
+ "step": 558
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785902312505964,
+ "loss": 2.0544,
+ "step": 559
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785124454851084,
+ "loss": 1.7503,
+ "step": 560
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.000197843452020509,
+ "loss": 2.01,
+ "step": 561
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019783564554216518,
+ "loss": 1.748,
+ "step": 562
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978278251145924,
+ "loss": 2.0866,
+ "step": 563
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978199907389057,
+ "loss": 1.6046,
+ "step": 564
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019781214241622208,
+ "loss": 1.9222,
+ "step": 565
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019780428014766051,
+ "loss": 2.2003,
+ "step": 566
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019779640393434206,
+ "loss": 2.0534,
+ "step": 567
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001977885137773896,
+ "loss": 1.8609,
+ "step": 568
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019778060967792817,
+ "loss": 2.0666,
+ "step": 569
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019777269163708468,
+ "loss": 1.9512,
+ "step": 570
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019776475965598814,
+ "loss": 1.8349,
+ "step": 571
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977568137357694,
+ "loss": 2.0507,
+ "step": 572
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019774885387756138,
+ "loss": 1.7588,
+ "step": 573
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.000197740880082499,
+ "loss": 2.0981,
+ "step": 574
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019773289235171918,
+ "loss": 2.0953,
+ "step": 575
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019772489068636077,
+ "loss": 2.0678,
+ "step": 576
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019771687508756466,
+ "loss": 2.0136,
+ "step": 577
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977088455564736,
+ "loss": 1.9781,
+ "step": 578
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019770080209423254,
+ "loss": 2.2185,
+ "step": 579
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019769274470198827,
+ "loss": 1.8076,
+ "step": 580
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019768467338088957,
+ "loss": 1.6888,
+ "step": 581
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019767658813208726,
+ "loss": 2.1273,
+ "step": 582
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.0001976684889567341,
+ "loss": 2.3232,
+ "step": 583
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019766037585598487,
+ "loss": 2.366,
+ "step": 584
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019765224883099635,
+ "loss": 1.8939,
+ "step": 585
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019764410788292722,
+ "loss": 2.0162,
+ "step": 586
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019763595301293822,
+ "loss": 2.2752,
+ "step": 587
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976277842221921,
+ "loss": 1.9461,
+ "step": 588
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976196015118535,
+ "loss": 1.9999,
+ "step": 589
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976114048830891,
+ "loss": 2.0169,
+ "step": 590
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019760319433706757,
+ "loss": 2.1838,
+ "step": 591
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019759496987495955,
+ "loss": 2.3513,
+ "step": 592
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975867314979377,
+ "loss": 1.9915,
+ "step": 593
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975784792071766,
+ "loss": 2.1973,
+ "step": 594
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019757021300385286,
+ "loss": 2.3112,
+ "step": 595
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019756193288914507,
+ "loss": 2.0992,
+ "step": 596
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019755363886423376,
+ "loss": 2.4266,
+ "step": 597
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019754533093030148,
+ "loss": 1.7649,
+ "step": 598
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975370090885328,
+ "loss": 1.7573,
+ "step": 599
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019752867334011423,
+ "loss": 1.7949,
+ "step": 600
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975203236862342,
+ "loss": 2.0229,
+ "step": 601
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019751196012808325,
+ "loss": 2.0519,
+ "step": 602
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019750358266685383,
+ "loss": 2.0829,
+ "step": 603
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019749519130374038,
+ "loss": 2.0153,
+ "step": 604
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019748678603993933,
+ "loss": 1.8594,
+ "step": 605
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019747836687664908,
+ "loss": 2.1385,
+ "step": 606
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746993381507003,
+ "loss": 2.1317,
+ "step": 607
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746148685640451,
+ "loss": 1.1676,
+ "step": 608
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974530260018569,
+ "loss": 2.2856,
+ "step": 609
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974445512526336,
+ "loss": 2.1973,
+ "step": 610
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019743606260994278,
+ "loss": 1.6912,
+ "step": 611
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019742756007499486,
+ "loss": 1.8091,
+ "step": 612
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741904364900208,
+ "loss": 2.0108,
+ "step": 613
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741051333317867,
+ "loss": 2.1061,
+ "step": 614
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019740196912874087,
+ "loss": 1.8934,
+ "step": 615
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019739341103690693,
+ "loss": 1.8599,
+ "step": 616
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019738483905889703,
+ "loss": 2.0025,
+ "step": 617
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019737625319593335,
+ "loss": 1.8247,
+ "step": 618
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019736765344924005,
+ "loss": 2.222,
+ "step": 619
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019735903982004324,
+ "loss": 2.116,
+ "step": 620
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973504123095711,
+ "loss": 1.9183,
+ "step": 621
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973417709190536,
+ "loss": 2.1507,
+ "step": 622
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019733311564972296,
+ "loss": 1.7899,
+ "step": 623
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019732444650281315,
+ "loss": 2.1005,
+ "step": 624
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973157634795602,
+ "loss": 2.2391,
+ "step": 625
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019730706658120214,
+ "loss": 1.9466,
+ "step": 626
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.000197298355808979,
+ "loss": 1.9854,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "eval_loss": 1.9957869052886963,
+ "eval_runtime": 282.5544,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019728963116413266,
+ "loss": 2.1877,
+ "step": 628
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019728089264790712,
+ "loss": 2.2194,
+ "step": 629
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019727214026154827,
+ "loss": 1.9631,
+ "step": 630
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019726337400630405,
+ "loss": 2.3506,
+ "step": 631
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019725459388342432,
+ "loss": 2.0543,
+ "step": 632
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972457998941609,
+ "loss": 2.0402,
+ "step": 633
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019723699203976766,
+ "loss": 1.9316,
+ "step": 634
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972281703215004,
+ "loss": 2.2024,
+ "step": 635
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721933474061692,
+ "loss": 1.6776,
+ "step": 636
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721048529837694,
+ "loss": 1.9757,
+ "step": 637
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019720162199604222,
+ "loss": 1.7631,
+ "step": 638
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019719274483487648,
+ "loss": 2.34,
+ "step": 639
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.0001971838538161454,
+ "loss": 1.8469,
+ "step": 640
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019717494894111662,
+ "loss": 2.3151,
+ "step": 641
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019716603021105987,
+ "loss": 2.0661,
+ "step": 642
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019715709762724667,
+ "loss": 2.0408,
+ "step": 643
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019714815119095062,
+ "loss": 1.9848,
+ "step": 644
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019713919090344736,
+ "loss": 2.3134,
+ "step": 645
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019713021676601438,
+ "loss": 2.4947,
+ "step": 646
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001971212287799312,
+ "loss": 2.0515,
+ "step": 647
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019711222694647932,
+ "loss": 2.6216,
+ "step": 648
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019710321126694216,
+ "loss": 1.6517,
+ "step": 649
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001970941817426052,
+ "loss": 2.0408,
+ "step": 650
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019708513837475588,
+ "loss": 1.8841,
+ "step": 651
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019707608116468356,
+ "loss": 2.1966,
+ "step": 652
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019706701011367955,
+ "loss": 1.7587,
+ "step": 653
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970579252230373,
+ "loss": 2.2196,
+ "step": 654
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019704882649405198,
+ "loss": 1.8146,
+ "step": 655
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703971392802098,
+ "loss": 2.2932,
+ "step": 656
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703058752624353,
+ "loss": 1.923,
+ "step": 657
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970214472900208,
+ "loss": 2.2393,
+ "step": 658
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019701229322065605,
+ "loss": 1.7338,
+ "step": 659
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019700312531945442,
+ "loss": 1.7859,
+ "step": 660
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019699394358772306,
+ "loss": 2.2719,
+ "step": 661
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019698474802677107,
+ "loss": 1.576,
+ "step": 662
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019697553863790956,
+ "loss": 2.3333,
+ "step": 663
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019696631542245156,
+ "loss": 2.3508,
+ "step": 664
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019695707838171216,
+ "loss": 2.1876,
+ "step": 665
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019694782751700828,
+ "loss": 1.4863,
+ "step": 666
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019693856282965898,
+ "loss": 1.8948,
+ "step": 667
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019692928432098512,
+ "loss": 1.6867,
+ "step": 668
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691999199230963,
+ "loss": 1.7682,
+ "step": 669
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691068584495742,
+ "loss": 2.0914,
+ "step": 670
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019690136588025535,
+ "loss": 2.1413,
+ "step": 671
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019689203209953223,
+ "loss": 2.1275,
+ "step": 672
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.0001968826845041188,
+ "loss": 1.9556,
+ "step": 673
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019687332309534792,
+ "loss": 2.2209,
+ "step": 674
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019686394787455424,
+ "loss": 1.9853,
+ "step": 675
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019685455884307454,
+ "loss": 2.0877,
+ "step": 676
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019684515600224743,
+ "loss": 2.1607,
+ "step": 677
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019683573935341358,
+ "loss": 2.2664,
+ "step": 678
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019682630889791556,
+ "loss": 1.8527,
+ "step": 679
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.000196816864637098,
+ "loss": 1.8417,
+ "step": 680
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019680740657230738,
+ "loss": 1.9853,
+ "step": 681
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019679793470489228,
+ "loss": 1.8419,
+ "step": 682
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019678844903620317,
+ "loss": 1.9971,
+ "step": 683
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019677894956759246,
+ "loss": 1.9843,
+ "step": 684
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019676943630041462,
+ "loss": 2.376,
+ "step": 685
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675990923602598,
+ "loss": 2.1558,
+ "step": 686
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675036837578494,
+ "loss": 1.5752,
+ "step": 687
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001967408137210518,
+ "loss": 1.6704,
+ "step": 688
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019673124527318881,
+ "loss": 2.1389,
+ "step": 689
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019672166303356028,
+ "loss": 2.126,
+ "step": 690
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019671206700353237,
+ "loss": 1.9402,
+ "step": 691
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019670245718447335,
+ "loss": 1.6701,
+ "step": 692
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019669283357775328,
+ "loss": 1.8134,
+ "step": 693
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001966831961847443,
+ "loss": 2.1642,
+ "step": 694
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019667354500682054,
+ "loss": 1.8455,
+ "step": 695
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.000196663880045358,
+ "loss": 1.9646,
+ "step": 696
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966542013017347,
+ "loss": 1.9855,
+ "step": 697
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019664450877733062,
+ "loss": 1.7029,
+ "step": 698
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019663480247352773,
+ "loss": 1.9789,
+ "step": 699
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966250823917099,
+ "loss": 1.8751,
+ "step": 700
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019661534853326301,
+ "loss": 2.3644,
+ "step": 701
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019660560089957492,
+ "loss": 1.8006,
+ "step": 702
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001965958394920354,
+ "loss": 2.2799,
+ "step": 703
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019658606431203622,
+ "loss": 1.9258,
+ "step": 704
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965762753609711,
+ "loss": 1.9521,
+ "step": 705
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019656647264023575,
+ "loss": 1.9675,
+ "step": 706
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019655665615122783,
+ "loss": 2.3686,
+ "step": 707
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019654682589534693,
+ "loss": 2.1448,
+ "step": 708
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019653698187399466,
+ "loss": 2.2475,
+ "step": 709
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965271240885745,
+ "loss": 1.9417,
+ "step": 710
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965172525404921,
+ "loss": 2.154,
+ "step": 711
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019650736723115475,
+ "loss": 2.0646,
+ "step": 712
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019649746816197196,
+ "loss": 2.235,
+ "step": 713
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019648755533435518,
+ "loss": 1.7122,
+ "step": 714
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019647762874971765,
+ "loss": 2.0635,
+ "step": 715
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019646768840947474,
+ "loss": 1.8904,
+ "step": 716
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019645773431504373,
+ "loss": 1.608,
+ "step": 717
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019644776646784388,
+ "loss": 2.2307,
+ "step": 718
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.0001964377848692963,
+ "loss": 2.176,
+ "step": 719
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019642778952082426,
+ "loss": 2.1984,
+ "step": 720
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001964177804238528,
+ "loss": 2.2625,
+ "step": 721
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019640775757980903,
+ "loss": 2.3142,
+ "step": 722
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019639772099012197,
+ "loss": 2.2366,
+ "step": 723
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019638767065622266,
+ "loss": 1.7823,
+ "step": 724
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.000196377606579544,
+ "loss": 2.0677,
+ "step": 725
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019636752876152095,
+ "loss": 1.3337,
+ "step": 726
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019635743720359037,
+ "loss": 2.055,
+ "step": 727
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001963473319071911,
+ "loss": 1.9888,
+ "step": 728
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019633721287376393,
+ "loss": 1.9258,
+ "step": 729
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019632708010475165,
+ "loss": 2.3768,
+ "step": 730
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001963169336015989,
+ "loss": 1.993,
+ "step": 731
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019630677336575242,
+ "loss": 2.1989,
+ "step": 732
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001962965993986608,
+ "loss": 2.1216,
+ "step": 733
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019628641170177464,
+ "loss": 2.2217,
+ "step": 734
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019627621027654648,
+ "loss": 1.8809,
+ "step": 735
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019626599512443077,
+ "loss": 2.0864,
+ "step": 736
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019625576624688406,
+ "loss": 2.0627,
+ "step": 737
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019624552364536473,
+ "loss": 2.1347,
+ "step": 738
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019623526732133315,
+ "loss": 1.9998,
+ "step": 739
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019622499727625162,
+ "loss": 2.1998,
+ "step": 740
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019621471351158443,
+ "loss": 1.974,
+ "step": 741
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019620441602879787,
+ "loss": 1.9425,
+ "step": 742
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019619410482936008,
+ "loss": 2.6227,
+ "step": 743
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019618377991474124,
+ "loss": 2.1209,
+ "step": 744
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019617344128641345,
+ "loss": 2.0606,
+ "step": 745
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019616308894585078,
+ "loss": 2.296,
+ "step": 746
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019615272289452923,
+ "loss": 2.0415,
+ "step": 747
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961423431339268,
+ "loss": 1.9516,
+ "step": 748
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961319496655234,
+ "loss": 2.0468,
+ "step": 749
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961215424908009,
+ "loss": 1.877,
+ "step": 750
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961111216112432,
+ "loss": 1.8129,
+ "step": 751
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019610068702833596,
+ "loss": 1.9984,
+ "step": 752
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019609023874356707,
+ "loss": 1.9013,
+ "step": 753
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019607977675842615,
+ "loss": 2.0546,
+ "step": 754
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019606930107440485,
+ "loss": 2.2817,
+ "step": 755
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960588116929968,
+ "loss": 2.0578,
+ "step": 756
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019604830861569755,
+ "loss": 2.3521,
+ "step": 757
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019603779184400457,
+ "loss": 2.0392,
+ "step": 758
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960272613794174,
+ "loss": 1.9863,
+ "step": 759
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019601671722343738,
+ "loss": 2.1889,
+ "step": 760
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960061593775679,
+ "loss": 2.0908,
+ "step": 761
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001959955878433143,
+ "loss": 1.986,
+ "step": 762
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019598500262218386,
+ "loss": 2.0339,
+ "step": 763
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019597440371568574,
+ "loss": 2.0958,
+ "step": 764
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.0001959637911253312,
+ "loss": 1.9866,
+ "step": 765
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019595316485263327,
+ "loss": 2.2228,
+ "step": 766
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019594252489910706,
+ "loss": 1.915,
+ "step": 767
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019593187126626965,
+ "loss": 2.0741,
+ "step": 768
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019592120395563994,
+ "loss": 2.5346,
+ "step": 769
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019591052296873888,
+ "loss": 2.4908,
+ "step": 770
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019589982830708937,
+ "loss": 2.1042,
+ "step": 771
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019588911997221625,
+ "loss": 1.8676,
+ "step": 772
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958783979656462,
+ "loss": 1.9152,
+ "step": 773
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019586766228890806,
+ "loss": 1.7784,
+ "step": 774
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958569129435324,
+ "loss": 2.0784,
+ "step": 775
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958461499310519,
+ "loss": 1.7262,
+ "step": 776
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019583537325300118,
+ "loss": 2.4154,
+ "step": 777
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019582458291091663,
+ "loss": 2.3185,
+ "step": 778
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019581377890633684,
+ "loss": 2.0981,
+ "step": 779
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019580296124080212,
+ "loss": 1.8952,
+ "step": 780
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019579212991585493,
+ "loss": 1.7208,
+ "step": 781
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019578128493303955,
+ "loss": 2.0209,
+ "step": 782
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019577042629390217,
+ "loss": 2.1867,
+ "step": 783
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957595539999911,
+ "loss": 2.0805,
+ "step": 784
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019574866805285645,
+ "loss": 2.0451,
+ "step": 785
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019573776845405028,
+ "loss": 2.2056,
+ "step": 786
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957268552051267,
+ "loss": 2.0773,
+ "step": 787
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019571592830764165,
+ "loss": 2.2036,
+ "step": 788
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019570498776315309,
+ "loss": 1.7298,
+ "step": 789
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956940335732209,
+ "loss": 1.8931,
+ "step": 790
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956830657394069,
+ "loss": 2.1567,
+ "step": 791
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019567208426327488,
+ "loss": 1.9471,
+ "step": 792
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019566108914639054,
+ "loss": 1.8916,
+ "step": 793
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019565008039032158,
+ "loss": 2.0111,
+ "step": 794
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019563905799663752,
+ "loss": 2.1374,
+ "step": 795
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019562802196691003,
+ "loss": 2.3083,
+ "step": 796
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019561697230271254,
+ "loss": 2.0381,
+ "step": 797
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001956059090056205,
+ "loss": 2.1909,
+ "step": 798
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019559483207721133,
+ "loss": 1.9893,
+ "step": 799
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955837415190643,
+ "loss": 2.3178,
+ "step": 800
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955726373327607,
+ "loss": 2.0815,
+ "step": 801
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019556151951988376,
+ "loss": 1.6012,
+ "step": 802
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019555038808201865,
+ "loss": 1.4965,
+ "step": 803
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019553924302075242,
+ "loss": 2.3069,
+ "step": 804
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019552808433767415,
+ "loss": 2.2388,
+ "step": 805
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019551691203437482,
+ "loss": 2.5662,
+ "step": 806
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019550572611244738,
+ "loss": 1.9419,
+ "step": 807
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019549452657348663,
+ "loss": 2.3638,
+ "step": 808
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019548331341908947,
+ "loss": 2.1567,
+ "step": 809
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019547208665085457,
+ "loss": 1.9697,
+ "step": 810
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019546084627038268,
+ "loss": 1.9006,
+ "step": 811
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.0001954495922792764,
+ "loss": 2.304,
+ "step": 812
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.0001954383246791403,
+ "loss": 2.0494,
+ "step": 813
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019542704347158093,
+ "loss": 1.8562,
+ "step": 814
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019541574865820672,
+ "loss": 2.1041,
+ "step": 815
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019540444024062804,
+ "loss": 2.22,
+ "step": 816
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019539311822045727,
+ "loss": 1.9925,
+ "step": 817
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019538178259930869,
+ "loss": 2.3213,
+ "step": 818
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019537043337879845,
+ "loss": 2.0319,
+ "step": 819
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019535907056054475,
+ "loss": 1.8578,
+ "step": 820
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019534769414616764,
+ "loss": 1.4115,
+ "step": 821
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953363041372892,
+ "loss": 2.0731,
+ "step": 822
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019532490053553335,
+ "loss": 2.0605,
+ "step": 823
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019531348334252607,
+ "loss": 1.9044,
+ "step": 824
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953020525598951,
+ "loss": 1.7405,
+ "step": 825
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001952906081892703,
+ "loss": 1.898,
+ "step": 826
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019527915023228332,
+ "loss": 1.9696,
+ "step": 827
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019526767869056788,
+ "loss": 2.0469,
+ "step": 828
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019525619356575952,
+ "loss": 2.0307,
+ "step": 829
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019524469485949583,
+ "loss": 2.002,
+ "step": 830
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019523318257341622,
+ "loss": 1.9438,
+ "step": 831
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019522165670916207,
+ "loss": 1.535,
+ "step": 832
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001952101172683768,
+ "loss": 1.7505,
+ "step": 833
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019519856425270562,
+ "loss": 2.2248,
+ "step": 834
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019518699766379576,
+ "loss": 2.0669,
+ "step": 835
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019517541750329635,
+ "loss": 2.0268,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "eval_loss": 1.9969017505645752,
+ "eval_runtime": 283.3157,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019516382377285848,
+ "loss": 1.6712,
+ "step": 837
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001951522164741352,
+ "loss": 2.1558,
+ "step": 838
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019514059560878138,
+ "loss": 2.1599,
+ "step": 839
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019512896117845392,
+ "loss": 1.8762,
+ "step": 840
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019511731318481168,
+ "loss": 2.0189,
+ "step": 841
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019510565162951537,
+ "loss": 1.9364,
+ "step": 842
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019509397651422769,
+ "loss": 1.7319,
+ "step": 843
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019508228784061326,
+ "loss": 1.9424,
+ "step": 844
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001950705856103386,
+ "loss": 2.277,
+ "step": 845
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019505886982507225,
+ "loss": 1.6511,
+ "step": 846
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950471404864846,
+ "loss": 1.9056,
+ "step": 847
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019503539759624798,
+ "loss": 1.5105,
+ "step": 848
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950236411560367,
+ "loss": 1.9469,
+ "step": 849
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019501187116752693,
+ "loss": 1.5012,
+ "step": 850
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019500008763239683,
+ "loss": 1.7086,
+ "step": 851
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019498829055232647,
+ "loss": 1.5586,
+ "step": 852
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019497647992899788,
+ "loss": 1.5573,
+ "step": 853
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.000194964655764095,
+ "loss": 2.0757,
+ "step": 854
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019495281805930367,
+ "loss": 1.5478,
+ "step": 855
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019494096681631172,
+ "loss": 1.7068,
+ "step": 856
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019492910203680884,
+ "loss": 1.6759,
+ "step": 857
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001949172237224867,
+ "loss": 1.4621,
+ "step": 858
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019490533187503892,
+ "loss": 1.5359,
+ "step": 859
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.000194893426496161,
+ "loss": 1.9365,
+ "step": 860
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019488150758755035,
+ "loss": 1.7089,
+ "step": 861
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019486957515090641,
+ "loss": 1.4924,
+ "step": 862
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019485762918793046,
+ "loss": 1.387,
+ "step": 863
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.0001948456697003257,
+ "loss": 1.631,
+ "step": 864
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019483369668979732,
+ "loss": 1.7953,
+ "step": 865
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019482171015805245,
+ "loss": 1.7552,
+ "step": 866
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019480971010680002,
+ "loss": 1.8313,
+ "step": 867
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019479769653775106,
+ "loss": 1.593,
+ "step": 868
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019478566945261837,
+ "loss": 1.9506,
+ "step": 869
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019477362885311682,
+ "loss": 1.9598,
+ "step": 870
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947615747409631,
+ "loss": 1.7324,
+ "step": 871
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019474950711787585,
+ "loss": 2.1208,
+ "step": 872
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947374259855757,
+ "loss": 1.4111,
+ "step": 873
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019472533134578507,
+ "loss": 1.6696,
+ "step": 874
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019471322320022849,
+ "loss": 1.6999,
+ "step": 875
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019470110155063225,
+ "loss": 2.1287,
+ "step": 876
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019468896639872468,
+ "loss": 1.874,
+ "step": 877
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019467681774623592,
+ "loss": 1.7149,
+ "step": 878
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019466465559489816,
+ "loss": 1.9563,
+ "step": 879
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019465247994644545,
+ "loss": 1.3504,
+ "step": 880
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019464029080261378,
+ "loss": 1.6176,
+ "step": 881
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019462808816514103,
+ "loss": 1.7577,
+ "step": 882
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019461587203576706,
+ "loss": 1.8054,
+ "step": 883
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019460364241623358,
+ "loss": 2.0246,
+ "step": 884
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019459139930828428,
+ "loss": 1.7645,
+ "step": 885
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945791427136648,
+ "loss": 1.9225,
+ "step": 886
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019456687263412262,
+ "loss": 1.8967,
+ "step": 887
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945545890714072,
+ "loss": 1.5287,
+ "step": 888
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945422920272699,
+ "loss": 1.5033,
+ "step": 889
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019452998150346401,
+ "loss": 2.0148,
+ "step": 890
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945176575017448,
+ "loss": 1.3706,
+ "step": 891
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001945053200238693,
+ "loss": 1.7603,
+ "step": 892
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019449296907159667,
+ "loss": 1.9884,
+ "step": 893
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019448060464668783,
+ "loss": 1.6133,
+ "step": 894
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019446822675090565,
+ "loss": 1.7885,
+ "step": 895
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019445583538601498,
+ "loss": 1.8573,
+ "step": 896
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944434305537826,
+ "loss": 1.7241,
+ "step": 897
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944310122559771,
+ "loss": 1.8942,
+ "step": 898
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944185804943691,
+ "loss": 1.7541,
+ "step": 899
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019440613527073105,
+ "loss": 1.9608,
+ "step": 900
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019439367658683745,
+ "loss": 2.0969,
+ "step": 901
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019438120444446457,
+ "loss": 2.2589,
+ "step": 902
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943687188453907,
+ "loss": 1.7335,
+ "step": 903
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019435621979139596,
+ "loss": 1.8663,
+ "step": 904
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019434370728426252,
+ "loss": 1.5627,
+ "step": 905
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943311813257743,
+ "loss": 1.6101,
+ "step": 906
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019431864191771732,
+ "loss": 1.9661,
+ "step": 907
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943060890618794,
+ "loss": 1.6487,
+ "step": 908
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019429352276005026,
+ "loss": 2.1282,
+ "step": 909
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019428094301402162,
+ "loss": 1.6944,
+ "step": 910
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019426834982558705,
+ "loss": 1.2433,
+ "step": 911
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019425574319654213,
+ "loss": 1.5735,
+ "step": 912
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019424312312868417,
+ "loss": 1.6499,
+ "step": 913
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019423048962381265,
+ "loss": 1.8366,
+ "step": 914
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019421784268372876,
+ "loss": 1.906,
+ "step": 915
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019420518231023568,
+ "loss": 1.5976,
+ "step": 916
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941925085051385,
+ "loss": 1.6722,
+ "step": 917
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019417982127024422,
+ "loss": 1.8832,
+ "step": 918
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019416712060736183,
+ "loss": 1.8865,
+ "step": 919
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019415440651830208,
+ "loss": 1.6627,
+ "step": 920
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941416790048778,
+ "loss": 1.3598,
+ "step": 921
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019412893806890357,
+ "loss": 2.0506,
+ "step": 922
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019411618371219605,
+ "loss": 1.9794,
+ "step": 923
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941034159365737,
+ "loss": 1.7851,
+ "step": 924
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001940906347438569,
+ "loss": 1.8312,
+ "step": 925
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019407784013586804,
+ "loss": 1.5167,
+ "step": 926
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019406503211443128,
+ "loss": 1.5725,
+ "step": 927
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019405221068137277,
+ "loss": 1.8857,
+ "step": 928
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019403937583852061,
+ "loss": 1.741,
+ "step": 929
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019402652758770475,
+ "loss": 1.6748,
+ "step": 930
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019401366593075706,
+ "loss": 1.7285,
+ "step": 931
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019400079086951135,
+ "loss": 1.7545,
+ "step": 932
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019398790240580333,
+ "loss": 1.4491,
+ "step": 933
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019397500054147058,
+ "loss": 1.3359,
+ "step": 934
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019396208527835263,
+ "loss": 1.9567,
+ "step": 935
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.0001939491566182909,
+ "loss": 2.0011,
+ "step": 936
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019393621456312881,
+ "loss": 1.9076,
+ "step": 937
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019392325911471155,
+ "loss": 1.5388,
+ "step": 938
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019391029027488629,
+ "loss": 1.2337,
+ "step": 939
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019389730804550211,
+ "loss": 1.5752,
+ "step": 940
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019388431242840998,
+ "loss": 1.9131,
+ "step": 941
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019387130342546284,
+ "loss": 1.4177,
+ "step": 942
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019385828103851544,
+ "loss": 1.5865,
+ "step": 943
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001938452452694245,
+ "loss": 1.6335,
+ "step": 944
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019383219612004865,
+ "loss": 1.8599,
+ "step": 945
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019381913359224842,
+ "loss": 1.3035,
+ "step": 946
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019380605768788621,
+ "loss": 1.7586,
+ "step": 947
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001937929684088264,
+ "loss": 1.7334,
+ "step": 948
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019377986575693518,
+ "loss": 1.5749,
+ "step": 949
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019376674973408075,
+ "loss": 1.874,
+ "step": 950
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019375362034213314,
+ "loss": 2.3055,
+ "step": 951
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019374047758296433,
+ "loss": 1.5801,
+ "step": 952
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001937273214584482,
+ "loss": 1.8788,
+ "step": 953
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019371415197046052,
+ "loss": 2.431,
+ "step": 954
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019370096912087897,
+ "loss": 1.4963,
+ "step": 955
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001936877729115831,
+ "loss": 1.514,
+ "step": 956
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019367456334445446,
+ "loss": 1.6099,
+ "step": 957
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019366134042137642,
+ "loss": 1.9367,
+ "step": 958
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019364810414423427,
+ "loss": 1.7384,
+ "step": 959
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019363485451491524,
+ "loss": 1.6166,
+ "step": 960
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019362159153530844,
+ "loss": 1.955,
+ "step": 961
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019360831520730482,
+ "loss": 1.4189,
+ "step": 962
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019359502553279736,
+ "loss": 1.4506,
+ "step": 963
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019358172251368087,
+ "loss": 1.7108,
+ "step": 964
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019356840615185203,
+ "loss": 1.6641,
+ "step": 965
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019355507644920952,
+ "loss": 1.7506,
+ "step": 966
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019354173340765382,
+ "loss": 2.0598,
+ "step": 967
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935283770290874,
+ "loss": 1.3494,
+ "step": 968
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019351500731541453,
+ "loss": 1.6571,
+ "step": 969
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935016242685415,
+ "loss": 1.6403,
+ "step": 970
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019348822789037637,
+ "loss": 1.7555,
+ "step": 971
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019347481818282925,
+ "loss": 2.1451,
+ "step": 972
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.000193461395147812,
+ "loss": 1.4522,
+ "step": 973
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934479587872385,
+ "loss": 1.7147,
+ "step": 974
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934345091030245,
+ "loss": 1.3909,
+ "step": 975
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019342104609708756,
+ "loss": 1.8104,
+ "step": 976
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019340756977134728,
+ "loss": 1.5221,
+ "step": 977
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.000193394080127725,
+ "loss": 1.9447,
+ "step": 978
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.0001933805771681442,
+ "loss": 1.5742,
+ "step": 979
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019336706089452996,
+ "loss": 1.5312,
+ "step": 980
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019335353130880948,
+ "loss": 1.4304,
+ "step": 981
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019333998841291177,
+ "loss": 1.8379,
+ "step": 982
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019332643220876773,
+ "loss": 1.877,
+ "step": 983
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001933128626983102,
+ "loss": 1.9627,
+ "step": 984
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932992798834739,
+ "loss": 1.7857,
+ "step": 985
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019328568376619543,
+ "loss": 1.3189,
+ "step": 986
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019327207434841333,
+ "loss": 1.9588,
+ "step": 987
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019325845163206795,
+ "loss": 1.3132,
+ "step": 988
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019324481561910163,
+ "loss": 1.6304,
+ "step": 989
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932311663114586,
+ "loss": 1.8322,
+ "step": 990
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019321750371108486,
+ "loss": 1.4192,
+ "step": 991
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001932038278199285,
+ "loss": 1.3915,
+ "step": 992
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019319013863993933,
+ "loss": 1.8433,
+ "step": 993
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931764361730692,
+ "loss": 2.1459,
+ "step": 994
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931627204212717,
+ "loss": 1.9799,
+ "step": 995
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019314899138650243,
+ "loss": 1.855,
+ "step": 996
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019313524907071887,
+ "loss": 1.4763,
+ "step": 997
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019312149347588037,
+ "loss": 2.0128,
+ "step": 998
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019310772460394814,
+ "loss": 1.6964,
+ "step": 999
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001930939424568854,
+ "loss": 1.5864,
+ "step": 1000
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019308014703665712,
+ "loss": 1.8437,
+ "step": 1001
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019306633834523024,
+ "loss": 2.1677,
+ "step": 1002
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019305251638457356,
+ "loss": 1.8872,
+ "step": 1003
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930386811566578,
+ "loss": 1.7312,
+ "step": 1004
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930248326634556,
+ "loss": 1.6772,
+ "step": 1005
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019301097090694143,
+ "loss": 1.9666,
+ "step": 1006
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019299709588909165,
+ "loss": 1.8946,
+ "step": 1007
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019298320761188453,
+ "loss": 2.1784,
+ "step": 1008
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.0001929693060773003,
+ "loss": 2.0249,
+ "step": 1009
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019295539128732093,
+ "loss": 1.717,
+ "step": 1010
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019294146324393046,
+ "loss": 1.8671,
+ "step": 1011
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019292752194911464,
+ "loss": 1.8388,
+ "step": 1012
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019291356740486123,
+ "loss": 1.9111,
+ "step": 1013
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019289959961315986,
+ "loss": 1.5287,
+ "step": 1014
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.000192885618576002,
+ "loss": 1.5669,
+ "step": 1015
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019287162429538105,
+ "loss": 1.9095,
+ "step": 1016
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019285761677329232,
+ "loss": 1.9133,
+ "step": 1017
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019284359601173294,
+ "loss": 2.1099,
+ "step": 1018
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.000192829562012702,
+ "loss": 1.6303,
+ "step": 1019
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019281551477820036,
+ "loss": 1.5907,
+ "step": 1020
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019280145431023097,
+ "loss": 1.4897,
+ "step": 1021
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019278738061079845,
+ "loss": 1.7414,
+ "step": 1022
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019277329368190942,
+ "loss": 1.816,
+ "step": 1023
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019275919352557241,
+ "loss": 1.5033,
+ "step": 1024
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019274508014379777,
+ "loss": 1.7923,
+ "step": 1025
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019273095353859775,
+ "loss": 1.3094,
+ "step": 1026
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019271681371198652,
+ "loss": 1.7689,
+ "step": 1027
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.0001927026606659801,
+ "loss": 1.8019,
+ "step": 1028
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019268849440259639,
+ "loss": 1.8818,
+ "step": 1029
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019267431492385521,
+ "loss": 1.7442,
+ "step": 1030
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019266012223177824,
+ "loss": 2.045,
+ "step": 1031
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019264591632838903,
+ "loss": 1.7842,
+ "step": 1032
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019263169721571308,
+ "loss": 1.5289,
+ "step": 1033
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019261746489577765,
+ "loss": 1.6013,
+ "step": 1034
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019260321937061202,
+ "loss": 1.7912,
+ "step": 1035
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925889606422473,
+ "loss": 1.7573,
+ "step": 1036
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925746887127164,
+ "loss": 1.7368,
+ "step": 1037
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019256040358405424,
+ "loss": 1.7497,
+ "step": 1038
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019254610525829758,
+ "loss": 2.0042,
+ "step": 1039
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019253179373748504,
+ "loss": 2.0732,
+ "step": 1040
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019251746902365708,
+ "loss": 1.8878,
+ "step": 1041
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019250313111885618,
+ "loss": 1.9404,
+ "step": 1042
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019248878002512654,
+ "loss": 1.5535,
+ "step": 1043
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019247441574451432,
+ "loss": 1.9344,
+ "step": 1044
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001924600382790676,
+ "loss": 1.9696,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "eval_loss": 2.064669609069824,
+ "eval_runtime": 283.003,
+ "eval_samples_per_second": 0.728,
+ "eval_steps_per_second": 0.728,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019244564763083624,
+ "loss": 1.4577,
+ "step": 1046
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019243124380187204,
+ "loss": 2.1324,
+ "step": 1047
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019241682679422873,
+ "loss": 1.4713,
+ "step": 1048
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019240239660996177,
+ "loss": 1.7455,
+ "step": 1049
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001923879532511287,
+ "loss": 1.5372,
+ "step": 1050
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019237349671978872,
+ "loss": 2.0984,
+ "step": 1051
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923590270180031,
+ "loss": 1.5023,
+ "step": 1052
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923445441478348,
+ "loss": 2.0826,
+ "step": 1053
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019233004811134886,
+ "loss": 1.7448,
+ "step": 1054
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019231553891061208,
+ "loss": 2.0249,
+ "step": 1055
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019230101654769312,
+ "loss": 1.6144,
+ "step": 1056
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001922864810246626,
+ "loss": 1.9193,
+ "step": 1057
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019227193234359292,
+ "loss": 2.0057,
+ "step": 1058
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019225737050655842,
+ "loss": 1.9493,
+ "step": 1059
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019224279551563532,
+ "loss": 1.9545,
+ "step": 1060
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001922282073729017,
+ "loss": 1.8983,
+ "step": 1061
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019221360608043746,
+ "loss": 1.9414,
+ "step": 1062
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019219899164032447,
+ "loss": 1.8471,
+ "step": 1063
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921843640546464,
+ "loss": 1.7568,
+ "step": 1064
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019216972332548887,
+ "loss": 2.0737,
+ "step": 1065
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921550694549393,
+ "loss": 1.6109,
+ "step": 1066
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.000192140402445087,
+ "loss": 1.6684,
+ "step": 1067
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001921257222980232,
+ "loss": 1.5101,
+ "step": 1068
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019211102901584094,
+ "loss": 1.5262,
+ "step": 1069
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920963226006352,
+ "loss": 1.9757,
+ "step": 1070
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019208160305450272,
+ "loss": 2.038,
+ "step": 1071
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019206687037954224,
+ "loss": 1.4755,
+ "step": 1072
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019205212457785434,
+ "loss": 1.7406,
+ "step": 1073
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019203736565154137,
+ "loss": 1.9564,
+ "step": 1074
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920225936027077,
+ "loss": 1.823,
+ "step": 1075
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001920078084334595,
+ "loss": 1.8275,
+ "step": 1076
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919930101459048,
+ "loss": 1.7106,
+ "step": 1077
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019197819874215347,
+ "loss": 1.5958,
+ "step": 1078
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019196337422431735,
+ "loss": 2.1478,
+ "step": 1079
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919485365945101,
+ "loss": 1.7238,
+ "step": 1080
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019193368585484718,
+ "loss": 2.0758,
+ "step": 1081
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.000191918822007446,
+ "loss": 1.8403,
+ "step": 1082
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019190394505442585,
+ "loss": 1.8286,
+ "step": 1083
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019188905499790789,
+ "loss": 1.6992,
+ "step": 1084
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019187415184001503,
+ "loss": 1.8512,
+ "step": 1085
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918592355828722,
+ "loss": 1.8236,
+ "step": 1086
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918443062286061,
+ "loss": 1.6173,
+ "step": 1087
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019182936377934535,
+ "loss": 1.8593,
+ "step": 1088
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918144082372204,
+ "loss": 1.8184,
+ "step": 1089
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019179943960436358,
+ "loss": 1.9655,
+ "step": 1090
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019178445788290915,
+ "loss": 1.5858,
+ "step": 1091
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019176946307499312,
+ "loss": 1.8359,
+ "step": 1092
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917544551827534,
+ "loss": 1.4354,
+ "step": 1093
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019173943420832984,
+ "loss": 1.4312,
+ "step": 1094
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917244001538641,
+ "loss": 2.0024,
+ "step": 1095
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019170935302149965,
+ "loss": 1.5994,
+ "step": 1096
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019169429281338195,
+ "loss": 2.05,
+ "step": 1097
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019167921953165825,
+ "loss": 1.8746,
+ "step": 1098
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019166413317847763,
+ "loss": 2.0071,
+ "step": 1099
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019164903375599112,
+ "loss": 2.0331,
+ "step": 1100
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019163392126635154,
+ "loss": 1.3587,
+ "step": 1101
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019161879571171362,
+ "loss": 1.6144,
+ "step": 1102
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019160365709423388,
+ "loss": 1.4845,
+ "step": 1103
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019158850541607083,
+ "loss": 1.4511,
+ "step": 1104
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019157334067938474,
+ "loss": 1.8015,
+ "step": 1105
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019155816288633776,
+ "loss": 1.5029,
+ "step": 1106
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019154297203909394,
+ "loss": 1.7102,
+ "step": 1107
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019152776813981912,
+ "loss": 1.6661,
+ "step": 1108
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001915125511906811,
+ "loss": 1.5872,
+ "step": 1109
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019149732119384943,
+ "loss": 1.7868,
+ "step": 1110
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914820781514956,
+ "loss": 1.6365,
+ "step": 1111
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914668220657929,
+ "loss": 2.3434,
+ "step": 1112
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914515529389166,
+ "loss": 1.6458,
+ "step": 1113
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914362707730437,
+ "loss": 1.7061,
+ "step": 1114
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019142097557035308,
+ "loss": 1.8606,
+ "step": 1115
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019140566733302552,
+ "loss": 1.9415,
+ "step": 1116
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019139034606324362,
+ "loss": 1.7411,
+ "step": 1117
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019137501176319193,
+ "loss": 1.9404,
+ "step": 1118
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913596644350567,
+ "loss": 1.802,
+ "step": 1119
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019134430408102615,
+ "loss": 1.2244,
+ "step": 1120
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019132893070329036,
+ "loss": 1.902,
+ "step": 1121
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913135443040412,
+ "loss": 1.4578,
+ "step": 1122
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019129814488547247,
+ "loss": 1.6816,
+ "step": 1123
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001912827324497798,
+ "loss": 1.7293,
+ "step": 1124
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019126730699916061,
+ "loss": 1.6344,
+ "step": 1125
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.0001912518685358143,
+ "loss": 1.6819,
+ "step": 1126
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019123641706194199,
+ "loss": 1.6761,
+ "step": 1127
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019122095257974677,
+ "loss": 1.9222,
+ "step": 1128
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019120547509143354,
+ "loss": 1.6117,
+ "step": 1129
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019118998459920902,
+ "loss": 1.688,
+ "step": 1130
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019117448110528184,
+ "loss": 1.8383,
+ "step": 1131
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019115896461186245,
+ "loss": 1.5225,
+ "step": 1132
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019114343512116318,
+ "loss": 2.0376,
+ "step": 1133
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019112789263539813,
+ "loss": 1.5632,
+ "step": 1134
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019111233715678343,
+ "loss": 1.7049,
+ "step": 1135
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910967686875369,
+ "loss": 1.4992,
+ "step": 1136
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019108118722987826,
+ "loss": 1.7949,
+ "step": 1137
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019106559278602903,
+ "loss": 1.4688,
+ "step": 1138
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019104998535821274,
+ "loss": 1.4031,
+ "step": 1139
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910343649486546,
+ "loss": 2.1757,
+ "step": 1140
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019101873155958179,
+ "loss": 1.622,
+ "step": 1141
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019100308519322322,
+ "loss": 1.9441,
+ "step": 1142
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.0001909874258518098,
+ "loss": 1.8065,
+ "step": 1143
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019097175353757417,
+ "loss": 1.8348,
+ "step": 1144
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019095606825275083,
+ "loss": 2.0519,
+ "step": 1145
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019094036999957624,
+ "loss": 1.9172,
+ "step": 1146
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019092465878028854,
+ "loss": 1.9961,
+ "step": 1147
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019090893459712787,
+ "loss": 2.1239,
+ "step": 1148
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019089319745233611,
+ "loss": 1.3481,
+ "step": 1149
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019087744734815708,
+ "loss": 1.5035,
+ "step": 1150
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019086168428683638,
+ "loss": 1.818,
+ "step": 1151
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019084590827062145,
+ "loss": 2.0481,
+ "step": 1152
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019083011930176165,
+ "loss": 1.4444,
+ "step": 1153
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019081431738250814,
+ "loss": 1.6059,
+ "step": 1154
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907985025151139,
+ "loss": 2.0284,
+ "step": 1155
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907826747018338,
+ "loss": 1.8603,
+ "step": 1156
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019076683394492455,
+ "loss": 1.7189,
+ "step": 1157
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019075098024664468,
+ "loss": 1.7497,
+ "step": 1158
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019073511360925458,
+ "loss": 1.7489,
+ "step": 1159
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001907192340350165,
+ "loss": 1.6059,
+ "step": 1160
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019070334152619453,
+ "loss": 1.4407,
+ "step": 1161
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019068743608505455,
+ "loss": 1.7025,
+ "step": 1162
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019067151771386438,
+ "loss": 1.7921,
+ "step": 1163
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906555864148936,
+ "loss": 1.6147,
+ "step": 1164
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906396421904137,
+ "loss": 1.6192,
+ "step": 1165
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019062368504269795,
+ "loss": 1.4341,
+ "step": 1166
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019060771497402147,
+ "loss": 1.3054,
+ "step": 1167
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001905917319866613,
+ "loss": 2.041,
+ "step": 1168
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019057573608289623,
+ "loss": 2.004,
+ "step": 1169
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019055972726500695,
+ "loss": 1.4002,
+ "step": 1170
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019054370553527595,
+ "loss": 1.5554,
+ "step": 1171
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019052767089598754,
+ "loss": 1.9783,
+ "step": 1172
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.000190511623349428,
+ "loss": 1.7443,
+ "step": 1173
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019049556289788528,
+ "loss": 1.6089,
+ "step": 1174
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001904794895436493,
+ "loss": 1.8784,
+ "step": 1175
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904634032890117,
+ "loss": 2.0985,
+ "step": 1176
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904473041362661,
+ "loss": 1.811,
+ "step": 1177
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019043119208770793,
+ "loss": 1.407,
+ "step": 1178
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904150671456343,
+ "loss": 1.7269,
+ "step": 1179
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019039892931234435,
+ "loss": 1.8374,
+ "step": 1180
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019038277859013896,
+ "loss": 1.583,
+ "step": 1181
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019036661498132086,
+ "loss": 1.6407,
+ "step": 1182
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019035043848819464,
+ "loss": 2.0828,
+ "step": 1183
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019033424911306672,
+ "loss": 1.7067,
+ "step": 1184
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019031804685824534,
+ "loss": 1.55,
+ "step": 1185
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001903018317260406,
+ "loss": 1.7573,
+ "step": 1186
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019028560371876446,
+ "loss": 1.5666,
+ "step": 1187
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001902693628387306,
+ "loss": 1.5192,
+ "step": 1188
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019025310908825466,
+ "loss": 2.0093,
+ "step": 1189
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019023684246965406,
+ "loss": 1.8414,
+ "step": 1190
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019022056298524808,
+ "loss": 1.3696,
+ "step": 1191
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019020427063735782,
+ "loss": 1.6336,
+ "step": 1192
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019018796542830617,
+ "loss": 1.8528,
+ "step": 1193
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019017164736041795,
+ "loss": 2.0523,
+ "step": 1194
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019015531643601973,
+ "loss": 1.7526,
+ "step": 1195
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019013897265743998,
+ "loss": 1.8391,
+ "step": 1196
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019012261602700892,
+ "loss": 1.4257,
+ "step": 1197
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019010624654705867,
+ "loss": 2.0911,
+ "step": 1198
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.0001900898642199232,
+ "loss": 1.7578,
+ "step": 1199
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019007346904793818,
+ "loss": 1.9003,
+ "step": 1200
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900570610334413,
+ "loss": 1.3918,
+ "step": 1201
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900406401787719,
+ "loss": 2.0365,
+ "step": 1202
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019002420648627131,
+ "loss": 1.5184,
+ "step": 1203
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019000775995828254,
+ "loss": 1.6412,
+ "step": 1204
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018999130059715058,
+ "loss": 1.5031,
+ "step": 1205
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018997482840522217,
+ "loss": 1.4421,
+ "step": 1206
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018995834338484584,
+ "loss": 1.9431,
+ "step": 1207
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.000189941845538372,
+ "loss": 1.8141,
+ "step": 1208
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.0001899253348681529,
+ "loss": 1.7289,
+ "step": 1209
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018990881137654258,
+ "loss": 1.7217,
+ "step": 1210
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.000189892275065897,
+ "loss": 2.3727,
+ "step": 1211
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018987572593857381,
+ "loss": 1.4833,
+ "step": 1212
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018985916399693256,
+ "loss": 2.13,
+ "step": 1213
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018984258924333464,
+ "loss": 1.875,
+ "step": 1214
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018982600168014323,
+ "loss": 1.783,
+ "step": 1215
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018980940130972337,
+ "loss": 1.6815,
+ "step": 1216
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897927881344419,
+ "loss": 2.049,
+ "step": 1217
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018977616215666752,
+ "loss": 1.918,
+ "step": 1218
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897595233787707,
+ "loss": 1.5824,
+ "step": 1219
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018974287180312377,
+ "loss": 1.7473,
+ "step": 1220
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018972620743210093,
+ "loss": 1.6915,
+ "step": 1221
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897095302680781,
+ "loss": 1.7633,
+ "step": 1222
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018969284031343308,
+ "loss": 1.6921,
+ "step": 1223
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018967613757054554,
+ "loss": 1.5433,
+ "step": 1224
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018965942204179686,
+ "loss": 1.9389,
+ "step": 1225
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018964269372957038,
+ "loss": 1.5625,
+ "step": 1226
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018962595263625115,
+ "loss": 1.4835,
+ "step": 1227
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018960919876422611,
+ "loss": 1.8479,
+ "step": 1228
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018959243211588397,
+ "loss": 1.7861,
+ "step": 1229
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018957565269361531,
+ "loss": 1.867,
+ "step": 1230
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018955886049981245,
+ "loss": 1.9383,
+ "step": 1231
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.0001895420555368697,
+ "loss": 1.755,
+ "step": 1232
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.000189525237807183,
+ "loss": 1.5166,
+ "step": 1233
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018950840731315024,
+ "loss": 1.8629,
+ "step": 1234
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.000189491564057171,
+ "loss": 1.6845,
+ "step": 1235
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018947470804164685,
+ "loss": 1.4748,
+ "step": 1236
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018945783926898105,
+ "loss": 1.8907,
+ "step": 1237
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018944095774157873,
+ "loss": 1.5758,
+ "step": 1238
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018942406346184683,
+ "loss": 1.6367,
+ "step": 1239
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018940715643219407,
+ "loss": 1.7285,
+ "step": 1240
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018939023665503108,
+ "loss": 1.5714,
+ "step": 1241
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001893733041327702,
+ "loss": 1.9308,
+ "step": 1242
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018935635886782568,
+ "loss": 1.9153,
+ "step": 1243
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018933940086261351,
+ "loss": 1.8009,
+ "step": 1244
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018932243011955154,
+ "loss": 1.7392,
+ "step": 1245
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018930544664105944,
+ "loss": 1.821,
+ "step": 1246
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001892884504295587,
+ "loss": 1.475,
+ "step": 1247
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018927144148747255,
+ "loss": 1.8937,
+ "step": 1248
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018925441981722618,
+ "loss": 1.6958,
+ "step": 1249
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018923738542124644,
+ "loss": 1.6836,
+ "step": 1250
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018922033830196208,
+ "loss": 2.0213,
+ "step": 1251
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018920327846180365,
+ "loss": 1.9572,
+ "step": 1252
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018918620590320352,
+ "loss": 1.9449,
+ "step": 1253
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018916912062859583,
+ "loss": 1.7297,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "eval_loss": 2.0551259517669678,
+ "eval_runtime": 283.8338,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018915202264041664,
+ "loss": 1.8158,
+ "step": 1255
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891349119411037,
+ "loss": 1.921,
+ "step": 1256
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018911778853309658,
+ "loss": 1.5726,
+ "step": 1257
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891006524188368,
+ "loss": 1.6641,
+ "step": 1258
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018908350360076752,
+ "loss": 1.5841,
+ "step": 1259
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018906634208133385,
+ "loss": 1.8567,
+ "step": 1260
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018904916786298257,
+ "loss": 1.5584,
+ "step": 1261
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018903198094816242,
+ "loss": 1.6615,
+ "step": 1262
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018901478133932385,
+ "loss": 1.7477,
+ "step": 1263
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018899756903891914,
+ "loss": 1.3796,
+ "step": 1264
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018898034404940238,
+ "loss": 1.7991,
+ "step": 1265
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018896310637322953,
+ "loss": 1.4944,
+ "step": 1266
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018894585601285827,
+ "loss": 1.5719,
+ "step": 1267
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018892859297074812,
+ "loss": 1.5495,
+ "step": 1268
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018891131724936043,
+ "loss": 1.7611,
+ "step": 1269
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018889402885115833,
+ "loss": 1.5991,
+ "step": 1270
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018887672777860676,
+ "loss": 1.8849,
+ "step": 1271
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888594140341725,
+ "loss": 1.6136,
+ "step": 1272
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888420876203241,
+ "loss": 1.8288,
+ "step": 1273
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888247485395319,
+ "loss": 1.6625,
+ "step": 1274
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018880739679426816,
+ "loss": 1.49,
+ "step": 1275
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018879003238700675,
+ "loss": 1.874,
+ "step": 1276
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018877265532022352,
+ "loss": 1.751,
+ "step": 1277
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018875526559639604,
+ "loss": 1.9882,
+ "step": 1278
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018873786321800374,
+ "loss": 1.5214,
+ "step": 1279
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001887204481875278,
+ "loss": 1.741,
+ "step": 1280
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018870302050745118,
+ "loss": 1.7798,
+ "step": 1281
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018868558018025878,
+ "loss": 1.9258,
+ "step": 1282
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001886681272084371,
+ "loss": 1.9096,
+ "step": 1283
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018865066159447466,
+ "loss": 1.6729,
+ "step": 1284
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018863318334086157,
+ "loss": 1.6239,
+ "step": 1285
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018861569245008994,
+ "loss": 1.9857,
+ "step": 1286
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018859818892465354,
+ "loss": 1.9905,
+ "step": 1287
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188580672767048,
+ "loss": 2.0073,
+ "step": 1288
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018856314397977075,
+ "loss": 1.7109,
+ "step": 1289
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188545602565321,
+ "loss": 1.3727,
+ "step": 1290
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018852804852619975,
+ "loss": 1.7045,
+ "step": 1291
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018851048186490992,
+ "loss": 1.9042,
+ "step": 1292
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018849290258395602,
+ "loss": 1.7174,
+ "step": 1293
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018847531068584452,
+ "loss": 1.6502,
+ "step": 1294
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018845770617308366,
+ "loss": 1.8582,
+ "step": 1295
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001884400890481834,
+ "loss": 1.4846,
+ "step": 1296
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018842245931365562,
+ "loss": 1.5428,
+ "step": 1297
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018840481697201392,
+ "loss": 1.7266,
+ "step": 1298
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001883871620257737,
+ "loss": 1.9324,
+ "step": 1299
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018836949447745215,
+ "loss": 1.577,
+ "step": 1300
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001883518143295683,
+ "loss": 1.6388,
+ "step": 1301
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018833412158464298,
+ "loss": 1.9201,
+ "step": 1302
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018831641624519877,
+ "loss": 1.6478,
+ "step": 1303
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018829869831376005,
+ "loss": 1.6826,
+ "step": 1304
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018828096779285303,
+ "loss": 1.8513,
+ "step": 1305
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018826322468500566,
+ "loss": 1.571,
+ "step": 1306
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018824546899274777,
+ "loss": 1.1602,
+ "step": 1307
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001882277007186109,
+ "loss": 1.9998,
+ "step": 1308
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001882099198651284,
+ "loss": 1.7034,
+ "step": 1309
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001881921264348355,
+ "loss": 1.4031,
+ "step": 1310
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018817432043026911,
+ "loss": 1.8413,
+ "step": 1311
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018815650185396797,
+ "loss": 1.6606,
+ "step": 1312
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018813867070847264,
+ "loss": 1.5792,
+ "step": 1313
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018812082699632546,
+ "loss": 1.4525,
+ "step": 1314
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018810297072007054,
+ "loss": 1.4906,
+ "step": 1315
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018808510188225377,
+ "loss": 1.6284,
+ "step": 1316
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880672204854229,
+ "loss": 1.7281,
+ "step": 1317
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880493265321274,
+ "loss": 1.5345,
+ "step": 1318
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018803142002491856,
+ "loss": 2.0933,
+ "step": 1319
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018801350096634946,
+ "loss": 1.9372,
+ "step": 1320
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.000187995569358975,
+ "loss": 1.7151,
+ "step": 1321
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018797762520535177,
+ "loss": 1.4823,
+ "step": 1322
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001879596685080383,
+ "loss": 2.0495,
+ "step": 1323
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018794169926959474,
+ "loss": 2.2966,
+ "step": 1324
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018792371749258314,
+ "loss": 1.7868,
+ "step": 1325
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018790572317956735,
+ "loss": 1.9403,
+ "step": 1326
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018788771633311292,
+ "loss": 1.6687,
+ "step": 1327
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018786969695578723,
+ "loss": 1.8422,
+ "step": 1328
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018785166505015948,
+ "loss": 1.5916,
+ "step": 1329
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018783362061880062,
+ "loss": 1.9119,
+ "step": 1330
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018781556366428336,
+ "loss": 1.4903,
+ "step": 1331
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018779749418918227,
+ "loss": 1.9497,
+ "step": 1332
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018777941219607364,
+ "loss": 1.9462,
+ "step": 1333
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018776131768753556,
+ "loss": 2.0474,
+ "step": 1334
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018774321066614795,
+ "loss": 1.4474,
+ "step": 1335
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018772509113449245,
+ "loss": 1.8315,
+ "step": 1336
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018770695909515247,
+ "loss": 1.7684,
+ "step": 1337
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018768881455071332,
+ "loss": 1.2675,
+ "step": 1338
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.000187670657503762,
+ "loss": 1.8226,
+ "step": 1339
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018765248795688726,
+ "loss": 2.2112,
+ "step": 1340
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.0001876343059126797,
+ "loss": 1.3627,
+ "step": 1341
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018761611137373173,
+ "loss": 2.1488,
+ "step": 1342
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018759790434263744,
+ "loss": 1.9842,
+ "step": 1343
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018757968482199276,
+ "loss": 1.9775,
+ "step": 1344
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018756145281439545,
+ "loss": 1.6835,
+ "step": 1345
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875432083224449,
+ "loss": 1.5272,
+ "step": 1346
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875249513487425,
+ "loss": 1.7539,
+ "step": 1347
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018750668189589117,
+ "loss": 1.874,
+ "step": 1348
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018748839996649583,
+ "loss": 1.5858,
+ "step": 1349
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018747010556316305,
+ "loss": 1.9298,
+ "step": 1350
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001874517986885012,
+ "loss": 1.5079,
+ "step": 1351
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018743347934512046,
+ "loss": 1.884,
+ "step": 1352
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018741514753563277,
+ "loss": 1.7978,
+ "step": 1353
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873968032626518,
+ "loss": 1.7735,
+ "step": 1354
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018737844652879312,
+ "loss": 1.7227,
+ "step": 1355
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018736007733667393,
+ "loss": 1.8458,
+ "step": 1356
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018734169568891334,
+ "loss": 1.3268,
+ "step": 1357
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873233015881321,
+ "loss": 1.3782,
+ "step": 1358
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018730489503695287,
+ "loss": 1.9614,
+ "step": 1359
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018728647603800003,
+ "loss": 1.7755,
+ "step": 1360
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018726804459389963,
+ "loss": 1.7961,
+ "step": 1361
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018724960070727972,
+ "loss": 1.7158,
+ "step": 1362
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872311443807699,
+ "loss": 1.6303,
+ "step": 1363
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872126756170017,
+ "loss": 1.8734,
+ "step": 1364
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018719419441860834,
+ "loss": 1.5143,
+ "step": 1365
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001871757007882248,
+ "loss": 1.498,
+ "step": 1366
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001871571947284879,
+ "loss": 1.0886,
+ "step": 1367
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018713867624203621,
+ "loss": 1.6633,
+ "step": 1368
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018712014533151008,
+ "loss": 1.8895,
+ "step": 1369
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018710160199955156,
+ "loss": 1.4178,
+ "step": 1370
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018708304624880456,
+ "loss": 1.6814,
+ "step": 1371
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001870644780819147,
+ "loss": 1.8671,
+ "step": 1372
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018704589750152944,
+ "loss": 1.4786,
+ "step": 1373
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018702730451029796,
+ "loss": 1.8622,
+ "step": 1374
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018700869911087115,
+ "loss": 1.8891,
+ "step": 1375
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869900813059018,
+ "loss": 2.0493,
+ "step": 1376
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018697145109804436,
+ "loss": 1.7238,
+ "step": 1377
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018695280848995513,
+ "loss": 1.7826,
+ "step": 1378
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869341534842921,
+ "loss": 1.8557,
+ "step": 1379
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869154860837151,
+ "loss": 1.7492,
+ "step": 1380
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868968062908857,
+ "loss": 1.7441,
+ "step": 1381
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868781141084672,
+ "loss": 1.8322,
+ "step": 1382
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868594095391247,
+ "loss": 1.8177,
+ "step": 1383
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018684069258552508,
+ "loss": 2.0001,
+ "step": 1384
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018682196325033696,
+ "loss": 1.5046,
+ "step": 1385
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018680322153623075,
+ "loss": 1.6789,
+ "step": 1386
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867844674458786,
+ "loss": 1.6951,
+ "step": 1387
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018676570098195443,
+ "loss": 2.0334,
+ "step": 1388
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018674692214713388,
+ "loss": 1.7833,
+ "step": 1389
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867281309440945,
+ "loss": 1.82,
+ "step": 1390
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018670932737551547,
+ "loss": 1.8155,
+ "step": 1391
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018669051144407775,
+ "loss": 1.7912,
+ "step": 1392
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018667168315246406,
+ "loss": 1.5816,
+ "step": 1393
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018665284250335895,
+ "loss": 1.7521,
+ "step": 1394
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018663398949944865,
+ "loss": 1.4287,
+ "step": 1395
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018661512414342127,
+ "loss": 1.6026,
+ "step": 1396
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018659624643796647,
+ "loss": 1.6953,
+ "step": 1397
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018657735638577587,
+ "loss": 1.8515,
+ "step": 1398
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018655845398954276,
+ "loss": 2.0384,
+ "step": 1399
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018653953925196225,
+ "loss": 1.5458,
+ "step": 1400
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018652061217573114,
+ "loss": 1.7166,
+ "step": 1401
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.000186501672763548,
+ "loss": 1.5653,
+ "step": 1402
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018648272101811318,
+ "loss": 2.0928,
+ "step": 1403
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018646375694212884,
+ "loss": 1.605,
+ "step": 1404
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018644478053829878,
+ "loss": 1.4734,
+ "step": 1405
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018642579180932865,
+ "loss": 2.0578,
+ "step": 1406
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018640679075792582,
+ "loss": 1.9823,
+ "step": 1407
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018638777738679943,
+ "loss": 2.0551,
+ "step": 1408
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018636875169866036,
+ "loss": 1.6315,
+ "step": 1409
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863497136962213,
+ "loss": 1.8965,
+ "step": 1410
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863306633821966,
+ "loss": 1.3584,
+ "step": 1411
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018631160075930245,
+ "loss": 1.9673,
+ "step": 1412
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018629252583025676,
+ "loss": 1.5277,
+ "step": 1413
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001862734385977792,
+ "loss": 1.6788,
+ "step": 1414
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018625433906459116,
+ "loss": 1.432,
+ "step": 1415
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018623522723341588,
+ "loss": 1.8102,
+ "step": 1416
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018621610310697823,
+ "loss": 1.6713,
+ "step": 1417
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018619696668800492,
+ "loss": 1.6989,
+ "step": 1418
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001861778179792244,
+ "loss": 1.7645,
+ "step": 1419
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018615865698336684,
+ "loss": 1.594,
+ "step": 1420
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018613948370316415,
+ "loss": 1.8751,
+ "step": 1421
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018612029814135014,
+ "loss": 1.64,
+ "step": 1422
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018610110030066007,
+ "loss": 1.5066,
+ "step": 1423
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001860818901838313,
+ "loss": 1.9817,
+ "step": 1424
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018606266779360266,
+ "loss": 2.056,
+ "step": 1425
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860434331327149,
+ "loss": 1.6997,
+ "step": 1426
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018602418620391044,
+ "loss": 1.5573,
+ "step": 1427
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860049270099335,
+ "loss": 1.8427,
+ "step": 1428
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018598565555353,
+ "loss": 2.012,
+ "step": 1429
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018596637183744763,
+ "loss": 1.7976,
+ "step": 1430
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018594707586443585,
+ "loss": 1.4,
+ "step": 1431
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001859277676372458,
+ "loss": 1.8717,
+ "step": 1432
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018590844715863045,
+ "loss": 1.4311,
+ "step": 1433
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018588911443134448,
+ "loss": 1.5903,
+ "step": 1434
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018586976945814425,
+ "loss": 2.0898,
+ "step": 1435
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018585041224178803,
+ "loss": 1.5302,
+ "step": 1436
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018583104278503568,
+ "loss": 1.9582,
+ "step": 1437
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018581166109064886,
+ "loss": 1.5264,
+ "step": 1438
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018579226716139096,
+ "loss": 1.6551,
+ "step": 1439
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018577286100002723,
+ "loss": 1.7774,
+ "step": 1440
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018575344260932444,
+ "loss": 1.8316,
+ "step": 1441
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001857340119920513,
+ "loss": 1.3916,
+ "step": 1442
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018571456915097818,
+ "loss": 1.6728,
+ "step": 1443
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856951140888772,
+ "loss": 1.7247,
+ "step": 1444
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018567564680852224,
+ "loss": 1.4539,
+ "step": 1445
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018565616731268888,
+ "loss": 1.613,
+ "step": 1446
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856366756041545,
+ "loss": 1.757,
+ "step": 1447
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018561717168569816,
+ "loss": 1.6903,
+ "step": 1448
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018559765556010072,
+ "loss": 1.7322,
+ "step": 1449
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018557812723014476,
+ "loss": 1.5627,
+ "step": 1450
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018555858669861458,
+ "loss": 1.8751,
+ "step": 1451
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018553903396829625,
+ "loss": 1.2721,
+ "step": 1452
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018551946904197752,
+ "loss": 1.8167,
+ "step": 1453
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018549989192244797,
+ "loss": 1.6602,
+ "step": 1454
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018548030261249885,
+ "loss": 1.9053,
+ "step": 1455
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018546070111492315,
+ "loss": 1.7721,
+ "step": 1456
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018544108743251566,
+ "loss": 2.1421,
+ "step": 1457
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018542146156807284,
+ "loss": 1.5076,
+ "step": 1458
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018540182352439288,
+ "loss": 1.9039,
+ "step": 1459
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018538217330427582,
+ "loss": 1.9777,
+ "step": 1460
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018536251091052323,
+ "loss": 1.5702,
+ "step": 1461
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018534283634593862,
+ "loss": 1.851,
+ "step": 1462
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018532314961332717,
+ "loss": 1.5337,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "eval_loss": 2.068387508392334,
+ "eval_runtime": 283.4638,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018530345071549574,
+ "loss": 1.7553,
+ "step": 1464
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018528373965525296,
+ "loss": 1.4175,
+ "step": 1465
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018526401643540922,
+ "loss": 1.7216,
+ "step": 1466
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018524428105877664,
+ "loss": 1.6415,
+ "step": 1467
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018522453352816896,
+ "loss": 1.7284,
+ "step": 1468
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018520477384640187,
+ "loss": 1.8314,
+ "step": 1469
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018518500201629258,
+ "loss": 1.8341,
+ "step": 1470
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018516521804066015,
+ "loss": 1.4129,
+ "step": 1471
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018514542192232537,
+ "loss": 1.4671,
+ "step": 1472
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018512561366411067,
+ "loss": 1.6665,
+ "step": 1473
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018510579326884034,
+ "loss": 1.5722,
+ "step": 1474
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850859607393403,
+ "loss": 1.9348,
+ "step": 1475
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850661160784383,
+ "loss": 1.5404,
+ "step": 1476
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018504625928896363,
+ "loss": 1.4769,
+ "step": 1477
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018502639037374757,
+ "loss": 1.4149,
+ "step": 1478
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850065093356229,
+ "loss": 1.958,
+ "step": 1479
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018498661617742426,
+ "loss": 1.8319,
+ "step": 1480
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018496671090198797,
+ "loss": 1.5948,
+ "step": 1481
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001849467935121521,
+ "loss": 1.8469,
+ "step": 1482
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018492686401075644,
+ "loss": 1.6798,
+ "step": 1483
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001849069224006425,
+ "loss": 1.8197,
+ "step": 1484
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848869686846535,
+ "loss": 1.6613,
+ "step": 1485
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848670028656344,
+ "loss": 1.7322,
+ "step": 1486
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018484702494643188,
+ "loss": 2.0493,
+ "step": 1487
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018482703492989444,
+ "loss": 1.7182,
+ "step": 1488
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018480703281887215,
+ "loss": 1.689,
+ "step": 1489
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018478701861621686,
+ "loss": 1.9477,
+ "step": 1490
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001847669923247822,
+ "loss": 1.8171,
+ "step": 1491
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018474695394742345,
+ "loss": 1.7337,
+ "step": 1492
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847269034869977,
+ "loss": 1.6983,
+ "step": 1493
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847068409463636,
+ "loss": 1.6445,
+ "step": 1494
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846867663283818,
+ "loss": 1.9965,
+ "step": 1495
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846666796359143,
+ "loss": 1.6775,
+ "step": 1496
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846465808718252,
+ "loss": 1.8117,
+ "step": 1497
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018462647003898006,
+ "loss": 1.8803,
+ "step": 1498
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018460634714024624,
+ "loss": 1.3045,
+ "step": 1499
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018458621217849286,
+ "loss": 1.7768,
+ "step": 1500
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018456606515659073,
+ "loss": 2.0641,
+ "step": 1501
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001845459060774123,
+ "loss": 1.3804,
+ "step": 1502
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018452573494383192,
+ "loss": 1.6271,
+ "step": 1503
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018450555175872547,
+ "loss": 1.8525,
+ "step": 1504
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018448535652497073,
+ "loss": 1.5303,
+ "step": 1505
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.000184465149245447,
+ "loss": 2.0368,
+ "step": 1506
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018444492992303544,
+ "loss": 1.9951,
+ "step": 1507
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001844246985606189,
+ "loss": 1.8715,
+ "step": 1508
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018440445516108186,
+ "loss": 1.7373,
+ "step": 1509
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018438419972731067,
+ "loss": 1.7667,
+ "step": 1510
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018436393226219327,
+ "loss": 1.5134,
+ "step": 1511
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018434365276861938,
+ "loss": 1.3891,
+ "step": 1512
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001843233612494804,
+ "loss": 1.7066,
+ "step": 1513
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018430305770766948,
+ "loss": 1.6366,
+ "step": 1514
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842827421460814,
+ "loss": 1.7838,
+ "step": 1515
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842624145676128,
+ "loss": 1.7884,
+ "step": 1516
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001842420749751619,
+ "loss": 1.8428,
+ "step": 1517
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018422172337162867,
+ "loss": 1.4987,
+ "step": 1518
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018420135975991483,
+ "loss": 1.7576,
+ "step": 1519
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001841809841429238,
+ "loss": 1.8522,
+ "step": 1520
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018416059652356066,
+ "loss": 1.9308,
+ "step": 1521
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018414019690473227,
+ "loss": 1.4658,
+ "step": 1522
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018411978528934717,
+ "loss": 1.7072,
+ "step": 1523
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001840993616803156,
+ "loss": 1.736,
+ "step": 1524
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840789260805495,
+ "loss": 1.7712,
+ "step": 1525
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840584784929626,
+ "loss": 1.2231,
+ "step": 1526
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018403801892047023,
+ "loss": 1.8421,
+ "step": 1527
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018401754736598947,
+ "loss": 1.2689,
+ "step": 1528
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018399706383243918,
+ "loss": 1.8062,
+ "step": 1529
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839765683227398,
+ "loss": 1.6846,
+ "step": 1530
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839560608398136,
+ "loss": 1.8201,
+ "step": 1531
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018393554138658441,
+ "loss": 1.6958,
+ "step": 1532
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018391500996597796,
+ "loss": 1.8487,
+ "step": 1533
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.0001838944665809215,
+ "loss": 1.9788,
+ "step": 1534
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018387391123434412,
+ "loss": 1.6002,
+ "step": 1535
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018385334392917658,
+ "loss": 1.3859,
+ "step": 1536
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018383276466835127,
+ "loss": 2.0743,
+ "step": 1537
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018381217345480235,
+ "loss": 1.8357,
+ "step": 1538
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018379157029146573,
+ "loss": 1.7002,
+ "step": 1539
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018377095518127897,
+ "loss": 1.3058,
+ "step": 1540
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018375032812718124,
+ "loss": 1.8745,
+ "step": 1541
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018372968913211364,
+ "loss": 1.7847,
+ "step": 1542
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018370903819901874,
+ "loss": 1.8156,
+ "step": 1543
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018368837533084095,
+ "loss": 2.0152,
+ "step": 1544
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018366770053052634,
+ "loss": 1.5656,
+ "step": 1545
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018364701380102266,
+ "loss": 1.5753,
+ "step": 1546
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018362631514527947,
+ "loss": 1.3938,
+ "step": 1547
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018360560456624788,
+ "loss": 1.9599,
+ "step": 1548
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018358488206688075,
+ "loss": 1.8641,
+ "step": 1549
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018356414765013267,
+ "loss": 1.8428,
+ "step": 1550
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018354340131895998,
+ "loss": 1.6016,
+ "step": 1551
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018352264307632056,
+ "loss": 1.5768,
+ "step": 1552
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018350187292517415,
+ "loss": 1.5369,
+ "step": 1553
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.0001834810908684821,
+ "loss": 1.9717,
+ "step": 1554
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018346029690920746,
+ "loss": 1.943,
+ "step": 1555
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018343949105031505,
+ "loss": 1.8166,
+ "step": 1556
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018341867329477125,
+ "loss": 1.7149,
+ "step": 1557
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018339784364554426,
+ "loss": 1.4657,
+ "step": 1558
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018337700210560397,
+ "loss": 1.8693,
+ "step": 1559
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018335614867792183,
+ "loss": 1.7656,
+ "step": 1560
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001833352833654712,
+ "loss": 1.5123,
+ "step": 1561
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018331440617122696,
+ "loss": 1.7884,
+ "step": 1562
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832935170981657,
+ "loss": 1.7309,
+ "step": 1563
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018327261614926583,
+ "loss": 1.9628,
+ "step": 1564
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018325170332750732,
+ "loss": 1.6409,
+ "step": 1565
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832307786358719,
+ "loss": 1.6093,
+ "step": 1566
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018320984207734298,
+ "loss": 1.6111,
+ "step": 1567
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018318889365490565,
+ "loss": 2.0085,
+ "step": 1568
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018316793337154664,
+ "loss": 2.079,
+ "step": 1569
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018314696123025454,
+ "loss": 1.5466,
+ "step": 1570
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018312597723401942,
+ "loss": 2.0825,
+ "step": 1571
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.0001831049813858332,
+ "loss": 1.9748,
+ "step": 1572
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018308397368868945,
+ "loss": 1.6529,
+ "step": 1573
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018306295414558335,
+ "loss": 1.7119,
+ "step": 1574
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018304192275951184,
+ "loss": 1.8812,
+ "step": 1575
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018302087953347352,
+ "loss": 1.8676,
+ "step": 1576
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018299982447046877,
+ "loss": 1.879,
+ "step": 1577
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018297875757349952,
+ "loss": 1.6282,
+ "step": 1578
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018295767884556947,
+ "loss": 1.735,
+ "step": 1579
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018293658828968397,
+ "loss": 1.5796,
+ "step": 1580
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018291548590885007,
+ "loss": 1.8258,
+ "step": 1581
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018289437170607658,
+ "loss": 1.7531,
+ "step": 1582
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018287324568437381,
+ "loss": 1.6265,
+ "step": 1583
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018285210784675394,
+ "loss": 1.7997,
+ "step": 1584
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018283095819623078,
+ "loss": 1.955,
+ "step": 1585
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018280979673581977,
+ "loss": 1.6542,
+ "step": 1586
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018278862346853808,
+ "loss": 1.7634,
+ "step": 1587
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018276743839740458,
+ "loss": 2.0077,
+ "step": 1588
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018274624152543977,
+ "loss": 2.0254,
+ "step": 1589
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018272503285566587,
+ "loss": 1.4464,
+ "step": 1590
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018270381239110677,
+ "loss": 1.8643,
+ "step": 1591
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018268258013478804,
+ "loss": 1.3278,
+ "step": 1592
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018266133608973696,
+ "loss": 1.744,
+ "step": 1593
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018264008025898248,
+ "loss": 1.5079,
+ "step": 1594
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018261881264555516,
+ "loss": 1.9655,
+ "step": 1595
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.0001825975332524873,
+ "loss": 2.0557,
+ "step": 1596
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.000182576242082813,
+ "loss": 1.7174,
+ "step": 1597
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018255493913956774,
+ "loss": 1.449,
+ "step": 1598
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018253362442578896,
+ "loss": 1.9058,
+ "step": 1599
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018251229794451567,
+ "loss": 1.3482,
+ "step": 1600
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018249095969878853,
+ "loss": 1.7906,
+ "step": 1601
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018246960969164994,
+ "loss": 1.6177,
+ "step": 1602
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018244824792614393,
+ "loss": 1.5786,
+ "step": 1603
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018242687440531618,
+ "loss": 1.6451,
+ "step": 1604
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018240548913221416,
+ "loss": 1.3695,
+ "step": 1605
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.0001823840921098869,
+ "loss": 1.6648,
+ "step": 1606
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018236268334138515,
+ "loss": 2.1548,
+ "step": 1607
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018234126282976133,
+ "loss": 1.6153,
+ "step": 1608
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001823198305780696,
+ "loss": 1.741,
+ "step": 1609
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018229838658936564,
+ "loss": 1.7827,
+ "step": 1610
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018227693086670697,
+ "loss": 1.7343,
+ "step": 1611
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018225546341315261,
+ "loss": 1.8149,
+ "step": 1612
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001822339842317635,
+ "loss": 1.5497,
+ "step": 1613
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018221249332560198,
+ "loss": 1.7659,
+ "step": 1614
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001821909906977322,
+ "loss": 1.8992,
+ "step": 1615
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018216947635122,
+ "loss": 1.8682,
+ "step": 1616
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018214795028913288,
+ "loss": 1.9774,
+ "step": 1617
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001821264125145399,
+ "loss": 1.9441,
+ "step": 1618
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018210486303051195,
+ "loss": 2.0314,
+ "step": 1619
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001820833018401215,
+ "loss": 1.8234,
+ "step": 1620
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018206172894644272,
+ "loss": 1.9478,
+ "step": 1621
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018204014435255135,
+ "loss": 1.7894,
+ "step": 1622
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.000182018548061525,
+ "loss": 1.5469,
+ "step": 1623
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018199694007644277,
+ "loss": 1.9419,
+ "step": 1624
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018197532040038547,
+ "loss": 1.6686,
+ "step": 1625
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018195368903643563,
+ "loss": 2.2525,
+ "step": 1626
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018193204598767744,
+ "loss": 1.8076,
+ "step": 1627
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018191039125719662,
+ "loss": 1.976,
+ "step": 1628
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018188872484808076,
+ "loss": 1.6896,
+ "step": 1629
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018186704676341898,
+ "loss": 1.6784,
+ "step": 1630
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018184535700630213,
+ "loss": 1.9634,
+ "step": 1631
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018182365557982264,
+ "loss": 1.7406,
+ "step": 1632
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018180194248707473,
+ "loss": 1.7492,
+ "step": 1633
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018178021773115414,
+ "loss": 1.7731,
+ "step": 1634
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018175848131515837,
+ "loss": 1.6232,
+ "step": 1635
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817367332421866,
+ "loss": 1.7488,
+ "step": 1636
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817149735153396,
+ "loss": 1.3398,
+ "step": 1637
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018169320213771983,
+ "loss": 1.4521,
+ "step": 1638
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018167141911243145,
+ "loss": 1.6311,
+ "step": 1639
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018164962444258014,
+ "loss": 1.8911,
+ "step": 1640
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018162781813127346,
+ "loss": 1.9879,
+ "step": 1641
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001816060001816205,
+ "loss": 1.5637,
+ "step": 1642
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018158417059673196,
+ "loss": 1.7461,
+ "step": 1643
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815623293797203,
+ "loss": 1.6671,
+ "step": 1644
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815404765336996,
+ "loss": 1.2124,
+ "step": 1645
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815186120617856,
+ "loss": 1.6402,
+ "step": 1646
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001814967359670957,
+ "loss": 1.8837,
+ "step": 1647
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018147484825274893,
+ "loss": 1.8027,
+ "step": 1648
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018145294892186605,
+ "loss": 1.7684,
+ "step": 1649
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.0001814310379775694,
+ "loss": 1.8274,
+ "step": 1650
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.000181409115422983,
+ "loss": 1.8292,
+ "step": 1651
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018138718126123248,
+ "loss": 1.3492,
+ "step": 1652
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018136523549544523,
+ "loss": 1.509,
+ "step": 1653
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018134327812875024,
+ "loss": 1.7415,
+ "step": 1654
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018132130916427816,
+ "loss": 1.5223,
+ "step": 1655
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018129932860516126,
+ "loss": 1.9294,
+ "step": 1656
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018127733645453348,
+ "loss": 2.0716,
+ "step": 1657
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018125533271553043,
+ "loss": 1.57,
+ "step": 1658
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018123331739128938,
+ "loss": 2.2132,
+ "step": 1659
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018121129048494922,
+ "loss": 1.9006,
+ "step": 1660
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018118925199965048,
+ "loss": 1.9319,
+ "step": 1661
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018116720193853543,
+ "loss": 1.8103,
+ "step": 1662
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018114514030474787,
+ "loss": 1.7028,
+ "step": 1663
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018112306710143334,
+ "loss": 1.802,
+ "step": 1664
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.000181100982331739,
+ "loss": 1.6835,
+ "step": 1665
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.0001810788859988136,
+ "loss": 1.7223,
+ "step": 1666
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810567781058077,
+ "loss": 1.5829,
+ "step": 1667
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018103465865587333,
+ "loss": 1.9863,
+ "step": 1668
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810125276521642,
+ "loss": 1.6398,
+ "step": 1669
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018099038509783582,
+ "loss": 1.9261,
+ "step": 1670
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018096823099604517,
+ "loss": 1.8882,
+ "step": 1671
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018094606534995093,
+ "loss": 1.6716,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "eval_loss": 2.075261354446411,
+ "eval_runtime": 283.9438,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018092388816271345,
+ "loss": 1.6688,
+ "step": 1673
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018090169943749476,
+ "loss": 1.9127,
+ "step": 1674
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001808794991774584,
+ "loss": 1.7214,
+ "step": 1675
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018085728738576973,
+ "loss": 1.785,
+ "step": 1676
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018083506406559561,
+ "loss": 1.5287,
+ "step": 1677
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018081282922010464,
+ "loss": 1.9012,
+ "step": 1678
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018079058285246698,
+ "loss": 1.3094,
+ "step": 1679
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807683249658545,
+ "loss": 1.818,
+ "step": 1680
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807460555634407,
+ "loss": 1.9389,
+ "step": 1681
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807237746484007,
+ "loss": 1.4334,
+ "step": 1682
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018070148222391126,
+ "loss": 1.5422,
+ "step": 1683
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806791782931508,
+ "loss": 1.7899,
+ "step": 1684
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806568628592994,
+ "loss": 1.6106,
+ "step": 1685
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018063453592553872,
+ "loss": 1.9807,
+ "step": 1686
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806121974950521,
+ "loss": 1.1762,
+ "step": 1687
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018058984757102456,
+ "loss": 1.8338,
+ "step": 1688
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001805674861566426,
+ "loss": 1.5556,
+ "step": 1689
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805451132550946,
+ "loss": 0.87,
+ "step": 1690
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018052272886957038,
+ "loss": 1.0386,
+ "step": 1691
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805003330032615,
+ "loss": 0.8153,
+ "step": 1692
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018047792565936102,
+ "loss": 1.1745,
+ "step": 1693
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018045550684106388,
+ "loss": 1.1584,
+ "step": 1694
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018043307655156644,
+ "loss": 1.0742,
+ "step": 1695
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018041063479406675,
+ "loss": 1.0537,
+ "step": 1696
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803881815717646,
+ "loss": 1.0239,
+ "step": 1697
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803657168878612,
+ "loss": 0.9182,
+ "step": 1698
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018034324074555965,
+ "loss": 1.1856,
+ "step": 1699
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018032075314806448,
+ "loss": 1.3285,
+ "step": 1700
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018029825409858198,
+ "loss": 1.2912,
+ "step": 1701
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018027574360032,
+ "loss": 1.3666,
+ "step": 1702
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018025322165648807,
+ "loss": 0.9621,
+ "step": 1703
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018023068827029723,
+ "loss": 0.8484,
+ "step": 1704
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018020814344496037,
+ "loss": 1.2236,
+ "step": 1705
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018018558718369186,
+ "loss": 0.8155,
+ "step": 1706
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.0001801630194897077,
+ "loss": 1.2047,
+ "step": 1707
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018014044036622555,
+ "loss": 1.0269,
+ "step": 1708
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018011784981646474,
+ "loss": 1.0536,
+ "step": 1709
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018009524784364615,
+ "loss": 1.0516,
+ "step": 1710
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018007263445099235,
+ "loss": 0.9087,
+ "step": 1711
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001800500096417275,
+ "loss": 1.3057,
+ "step": 1712
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018002737341907743,
+ "loss": 0.8791,
+ "step": 1713
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018000472578626956,
+ "loss": 1.1667,
+ "step": 1714
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017998206674653294,
+ "loss": 1.1026,
+ "step": 1715
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017995939630309826,
+ "loss": 1.3228,
+ "step": 1716
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001799367144591978,
+ "loss": 0.9173,
+ "step": 1717
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017991402121806557,
+ "loss": 1.0067,
+ "step": 1718
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798913165829371,
+ "loss": 1.0256,
+ "step": 1719
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017986860055704953,
+ "loss": 0.7645,
+ "step": 1720
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798458731436417,
+ "loss": 1.0567,
+ "step": 1721
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017982313434595406,
+ "loss": 0.7465,
+ "step": 1722
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017980038416722863,
+ "loss": 1.3268,
+ "step": 1723
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017977762261070916,
+ "loss": 0.9917,
+ "step": 1724
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017975484967964087,
+ "loss": 0.8592,
+ "step": 1725
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017973206537727073,
+ "loss": 1.43,
+ "step": 1726
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017970926970684725,
+ "loss": 1.3679,
+ "step": 1727
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017968646267162063,
+ "loss": 1.2959,
+ "step": 1728
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017966364427484267,
+ "loss": 1.0674,
+ "step": 1729
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017964081451976672,
+ "loss": 1.1153,
+ "step": 1730
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017961797340964783,
+ "loss": 1.0586,
+ "step": 1731
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017959512094774266,
+ "loss": 1.2388,
+ "step": 1732
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017957225713730949,
+ "loss": 1.257,
+ "step": 1733
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.0001795493819816081,
+ "loss": 1.099,
+ "step": 1734
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001795264954839001,
+ "loss": 0.9532,
+ "step": 1735
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017950359764744859,
+ "loss": 1.2553,
+ "step": 1736
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017948068847551825,
+ "loss": 0.9973,
+ "step": 1737
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017945776797137543,
+ "loss": 1.0637,
+ "step": 1738
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017943483613828815,
+ "loss": 1.1815,
+ "step": 1739
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017941189297952597,
+ "loss": 0.8378,
+ "step": 1740
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017938893849836002,
+ "loss": 0.9375,
+ "step": 1741
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017936597269806322,
+ "loss": 0.9653,
+ "step": 1742
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001793429955819099,
+ "loss": 1.221,
+ "step": 1743
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017932000715317612,
+ "loss": 1.041,
+ "step": 1744
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017929700741513955,
+ "loss": 1.0724,
+ "step": 1745
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017927399637107945,
+ "loss": 1.1102,
+ "step": 1746
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017925097402427667,
+ "loss": 0.8542,
+ "step": 1747
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001792279403780137,
+ "loss": 1.2339,
+ "step": 1748
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017920489543557465,
+ "loss": 0.8671,
+ "step": 1749
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791818392002452,
+ "loss": 0.9779,
+ "step": 1750
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791587716753127,
+ "loss": 1.1242,
+ "step": 1751
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017913569286406603,
+ "loss": 0.9043,
+ "step": 1752
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001791126027697958,
+ "loss": 0.7996,
+ "step": 1753
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017908950139579406,
+ "loss": 0.8602,
+ "step": 1754
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017906638874535462,
+ "loss": 1.0161,
+ "step": 1755
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017904326482177284,
+ "loss": 0.8226,
+ "step": 1756
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017902012962834566,
+ "loss": 1.3885,
+ "step": 1757
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001789969831683717,
+ "loss": 1.2158,
+ "step": 1758
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017897382544515108,
+ "loss": 1.3261,
+ "step": 1759
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017895065646198567,
+ "loss": 1.2144,
+ "step": 1760
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017892747622217875,
+ "loss": 0.9881,
+ "step": 1761
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001789042847290354,
+ "loss": 1.0342,
+ "step": 1762
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017888108198586217,
+ "loss": 0.7883,
+ "step": 1763
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017885786799596732,
+ "loss": 0.9006,
+ "step": 1764
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017883464276266064,
+ "loss": 1.3695,
+ "step": 1765
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001788114062892535,
+ "loss": 1.0303,
+ "step": 1766
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017878815857905897,
+ "loss": 1.3816,
+ "step": 1767
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001787648996353916,
+ "loss": 0.8684,
+ "step": 1768
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017874162946156772,
+ "loss": 1.1157,
+ "step": 1769
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017871834806090501,
+ "loss": 1.0087,
+ "step": 1770
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.000178695055436723,
+ "loss": 0.7173,
+ "step": 1771
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017867175159234265,
+ "loss": 1.4784,
+ "step": 1772
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017864843653108662,
+ "loss": 1.1401,
+ "step": 1773
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786251102562791,
+ "loss": 1.0952,
+ "step": 1774
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786017727712459,
+ "loss": 0.9443,
+ "step": 1775
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017857842407931445,
+ "loss": 1.0682,
+ "step": 1776
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785550641838138,
+ "loss": 0.9402,
+ "step": 1777
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017853169308807448,
+ "loss": 1.0576,
+ "step": 1778
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785083107954288,
+ "loss": 1.1425,
+ "step": 1779
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017848491730921046,
+ "loss": 1.1402,
+ "step": 1780
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017846151263275494,
+ "loss": 1.4482,
+ "step": 1781
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017843809676939922,
+ "loss": 0.7765,
+ "step": 1782
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017841466972248188,
+ "loss": 1.1478,
+ "step": 1783
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001783912314953431,
+ "loss": 1.1876,
+ "step": 1784
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017836778209132464,
+ "loss": 1.2036,
+ "step": 1785
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783443215137699,
+ "loss": 1.0297,
+ "step": 1786
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783208497660239,
+ "loss": 0.8186,
+ "step": 1787
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017829736685143308,
+ "loss": 0.7258,
+ "step": 1788
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017827387277334568,
+ "loss": 0.8072,
+ "step": 1789
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017825036753511144,
+ "loss": 1.0474,
+ "step": 1790
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017822685114008167,
+ "loss": 1.2141,
+ "step": 1791
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017820332359160928,
+ "loss": 1.1443,
+ "step": 1792
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001781797848930488,
+ "loss": 0.9864,
+ "step": 1793
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017815623504775636,
+ "loss": 1.2998,
+ "step": 1794
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001781326740590896,
+ "loss": 1.0672,
+ "step": 1795
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017810910193040785,
+ "loss": 0.9152,
+ "step": 1796
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.000178085518665072,
+ "loss": 1.2555,
+ "step": 1797
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017806192426644444,
+ "loss": 1.2085,
+ "step": 1798
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017803831873788926,
+ "loss": 1.6205,
+ "step": 1799
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001780147020827721,
+ "loss": 1.3382,
+ "step": 1800
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017799107430446016,
+ "loss": 1.3309,
+ "step": 1801
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017796743540632223,
+ "loss": 1.2556,
+ "step": 1802
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017794378539172877,
+ "loss": 0.829,
+ "step": 1803
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017792012426405166,
+ "loss": 1.1711,
+ "step": 1804
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017789645202666456,
+ "loss": 1.0128,
+ "step": 1805
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017787276868294253,
+ "loss": 1.2074,
+ "step": 1806
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017784907423626237,
+ "loss": 1.0996,
+ "step": 1807
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778253686900023,
+ "loss": 0.9608,
+ "step": 1808
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778016520475423,
+ "loss": 0.827,
+ "step": 1809
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017777792431226383,
+ "loss": 1.2365,
+ "step": 1810
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017775418548754993,
+ "loss": 1.0276,
+ "step": 1811
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777304355767852,
+ "loss": 0.8178,
+ "step": 1812
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777066745833559,
+ "loss": 1.1297,
+ "step": 1813
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017768290251064987,
+ "loss": 1.1737,
+ "step": 1814
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017765911936205644,
+ "loss": 1.1606,
+ "step": 1815
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017763532514096658,
+ "loss": 1.2605,
+ "step": 1816
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001776115198507728,
+ "loss": 1.2271,
+ "step": 1817
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017758770349486923,
+ "loss": 0.9407,
+ "step": 1818
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001775638760766516,
+ "loss": 1.0273,
+ "step": 1819
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017754003759951715,
+ "loss": 1.0746,
+ "step": 1820
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017751618806686472,
+ "loss": 1.0091,
+ "step": 1821
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017749232748209473,
+ "loss": 0.997,
+ "step": 1822
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001774684558486092,
+ "loss": 1.4814,
+ "step": 1823
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017744457316981168,
+ "loss": 1.1407,
+ "step": 1824
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017742067944910737,
+ "loss": 0.9824,
+ "step": 1825
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017739677468990293,
+ "loss": 1.2603,
+ "step": 1826
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017737285889560668,
+ "loss": 1.3721,
+ "step": 1827
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017734893206962853,
+ "loss": 1.1186,
+ "step": 1828
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017732499421537984,
+ "loss": 0.7693,
+ "step": 1829
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001773010453362737,
+ "loss": 1.0449,
+ "step": 1830
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017727708543572467,
+ "loss": 0.9331,
+ "step": 1831
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001772531145171489,
+ "loss": 0.739,
+ "step": 1832
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017722913258396417,
+ "loss": 0.9076,
+ "step": 1833
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017720513963958968,
+ "loss": 1.3464,
+ "step": 1834
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017718113568744638,
+ "loss": 0.8858,
+ "step": 1835
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017715712073095672,
+ "loss": 1.3204,
+ "step": 1836
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017713309477354467,
+ "loss": 1.0538,
+ "step": 1837
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001771090578186358,
+ "loss": 1.44,
+ "step": 1838
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770850098696573,
+ "loss": 1.0167,
+ "step": 1839
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017706095093003785,
+ "loss": 0.9724,
+ "step": 1840
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017703688100320774,
+ "loss": 0.8055,
+ "step": 1841
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770128000925988,
+ "loss": 0.7363,
+ "step": 1842
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017698870820164446,
+ "loss": 1.1329,
+ "step": 1843
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017696460533377968,
+ "loss": 0.9487,
+ "step": 1844
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017694049149244104,
+ "loss": 1.2571,
+ "step": 1845
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001769163666810666,
+ "loss": 0.9148,
+ "step": 1846
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017689223090309607,
+ "loss": 1.4676,
+ "step": 1847
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017686808416197072,
+ "loss": 0.9395,
+ "step": 1848
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017684392646113325,
+ "loss": 0.9632,
+ "step": 1849
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017681975780402807,
+ "loss": 1.0037,
+ "step": 1850
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001767955781941011,
+ "loss": 0.9557,
+ "step": 1851
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017677138763479985,
+ "loss": 1.2799,
+ "step": 1852
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017674718612957336,
+ "loss": 0.8461,
+ "step": 1853
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001767229736818722,
+ "loss": 1.2762,
+ "step": 1854
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017669875029514856,
+ "loss": 1.4801,
+ "step": 1855
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017667451597285617,
+ "loss": 0.9849,
+ "step": 1856
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766502707184503,
+ "loss": 1.0875,
+ "step": 1857
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017662601453538783,
+ "loss": 0.8346,
+ "step": 1858
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766017474271271,
+ "loss": 1.1933,
+ "step": 1859
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017657746939712815,
+ "loss": 0.8789,
+ "step": 1860
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017655318044885245,
+ "loss": 1.0091,
+ "step": 1861
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.0001765288805857631,
+ "loss": 0.7371,
+ "step": 1862
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017650456981132466,
+ "loss": 0.8131,
+ "step": 1863
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017648024812900342,
+ "loss": 1.0795,
+ "step": 1864
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.000176455915542267,
+ "loss": 0.9882,
+ "step": 1865
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017643157205458483,
+ "loss": 1.212,
+ "step": 1866
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017640721766942768,
+ "loss": 1.4755,
+ "step": 1867
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017638285239026798,
+ "loss": 1.0391,
+ "step": 1868
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017635847622057965,
+ "loss": 1.2568,
+ "step": 1869
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017633408916383826,
+ "loss": 1.2138,
+ "step": 1870
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001763096912235208,
+ "loss": 1.196,
+ "step": 1871
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017628528240310596,
+ "loss": 1.1476,
+ "step": 1872
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017626086270607384,
+ "loss": 1.1421,
+ "step": 1873
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017623643213590619,
+ "loss": 1.0711,
+ "step": 1874
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001762119906960863,
+ "loss": 0.8842,
+ "step": 1875
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017618753839009893,
+ "loss": 0.798,
+ "step": 1876
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761630752214305,
+ "loss": 0.8591,
+ "step": 1877
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017613860119356883,
+ "loss": 0.7646,
+ "step": 1878
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761141163100035,
+ "loss": 1.4113,
+ "step": 1879
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017608962057422549,
+ "loss": 0.8605,
+ "step": 1880
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017606511398972731,
+ "loss": 0.6179,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "eval_loss": 2.3971996307373047,
+ "eval_runtime": 283.7444,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760405965600031,
+ "loss": 0.8651,
+ "step": 1882
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760160682885485,
+ "loss": 1.3178,
+ "step": 1883
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017599152917886071,
+ "loss": 0.9233,
+ "step": 1884
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017596697923443847,
+ "loss": 0.9126,
+ "step": 1885
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001759424184587821,
+ "loss": 0.9749,
+ "step": 1886
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017591784685539334,
+ "loss": 1.1929,
+ "step": 1887
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017589326442777565,
+ "loss": 1.2026,
+ "step": 1888
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017586867117943392,
+ "loss": 1.1162,
+ "step": 1889
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017584406711387463,
+ "loss": 0.9818,
+ "step": 1890
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001758194522346057,
+ "loss": 0.9802,
+ "step": 1891
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001757948265451368,
+ "loss": 0.8963,
+ "step": 1892
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017577019004897897,
+ "loss": 1.0359,
+ "step": 1893
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017574554274964478,
+ "loss": 1.0788,
+ "step": 1894
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017572088465064848,
+ "loss": 0.9415,
+ "step": 1895
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756962157555057,
+ "loss": 1.0944,
+ "step": 1896
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017567153606773373,
+ "loss": 1.357,
+ "step": 1897
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017564684559085136,
+ "loss": 1.0108,
+ "step": 1898
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756221443283789,
+ "loss": 0.5337,
+ "step": 1899
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755974322838382,
+ "loss": 1.4234,
+ "step": 1900
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755727094607527,
+ "loss": 0.9083,
+ "step": 1901
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017554797586264727,
+ "loss": 0.9199,
+ "step": 1902
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017552323149304844,
+ "loss": 1.1885,
+ "step": 1903
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754984763554842,
+ "loss": 1.276,
+ "step": 1904
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754737104534841,
+ "loss": 0.8882,
+ "step": 1905
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017544893379057918,
+ "loss": 0.993,
+ "step": 1906
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754241463703021,
+ "loss": 1.261,
+ "step": 1907
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017539934819618696,
+ "loss": 0.9877,
+ "step": 1908
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017537453927176947,
+ "loss": 0.9991,
+ "step": 1909
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017534971960058685,
+ "loss": 1.2012,
+ "step": 1910
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001753248891861778,
+ "loss": 0.864,
+ "step": 1911
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017530004803208263,
+ "loss": 1.0382,
+ "step": 1912
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017527519614184316,
+ "loss": 1.068,
+ "step": 1913
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017525033351900268,
+ "loss": 0.8687,
+ "step": 1914
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752254601671061,
+ "loss": 1.1174,
+ "step": 1915
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752005760896998,
+ "loss": 1.269,
+ "step": 1916
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751756812903317,
+ "loss": 0.7387,
+ "step": 1917
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751507757725513,
+ "loss": 0.8484,
+ "step": 1918
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001751258595399095,
+ "loss": 1.0092,
+ "step": 1919
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017510093259595885,
+ "loss": 1.0145,
+ "step": 1920
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017507599494425344,
+ "loss": 1.2969,
+ "step": 1921
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017505104658834875,
+ "loss": 0.7925,
+ "step": 1922
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017502608753180196,
+ "loss": 0.8974,
+ "step": 1923
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017500111777817164,
+ "loss": 0.764,
+ "step": 1924
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001749761373310179,
+ "loss": 1.1057,
+ "step": 1925
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017495114619390246,
+ "loss": 0.8092,
+ "step": 1926
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017492614437038845,
+ "loss": 0.9553,
+ "step": 1927
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017490113186404067,
+ "loss": 1.0278,
+ "step": 1928
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748761086784253,
+ "loss": 1.2152,
+ "step": 1929
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017485107481711012,
+ "loss": 1.5154,
+ "step": 1930
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748260302836644,
+ "loss": 1.1973,
+ "step": 1931
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017480097508165896,
+ "loss": 0.9429,
+ "step": 1932
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747759092146661,
+ "loss": 1.5453,
+ "step": 1933
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747508326862597,
+ "loss": 1.1691,
+ "step": 1934
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017472574550001508,
+ "loss": 1.2094,
+ "step": 1935
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017470064765950918,
+ "loss": 1.0777,
+ "step": 1936
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017467553916832035,
+ "loss": 1.0883,
+ "step": 1937
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017465042003002857,
+ "loss": 0.9297,
+ "step": 1938
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017462529024821522,
+ "loss": 0.7814,
+ "step": 1939
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017460014982646334,
+ "loss": 1.3645,
+ "step": 1940
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.0001745749987683573,
+ "loss": 1.0604,
+ "step": 1941
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017454983707748317,
+ "loss": 0.9416,
+ "step": 1942
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017452466475742845,
+ "loss": 1.4187,
+ "step": 1943
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017449948181178215,
+ "loss": 1.1619,
+ "step": 1944
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017447428824413482,
+ "loss": 1.1381,
+ "step": 1945
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017444908405807845,
+ "loss": 1.2304,
+ "step": 1946
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.0001744238692572067,
+ "loss": 1.2149,
+ "step": 1947
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017439864384511463,
+ "loss": 0.8172,
+ "step": 1948
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017437340782539877,
+ "loss": 1.0783,
+ "step": 1949
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017434816120165728,
+ "loss": 1.0661,
+ "step": 1950
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017432290397748982,
+ "loss": 1.1959,
+ "step": 1951
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001742976361564974,
+ "loss": 1.0581,
+ "step": 1952
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017427235774228274,
+ "loss": 0.8948,
+ "step": 1953
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017424706873845,
+ "loss": 1.2565,
+ "step": 1954
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017422176914860476,
+ "loss": 0.9237,
+ "step": 1955
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017419645897635432,
+ "loss": 1.219,
+ "step": 1956
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017417113822530727,
+ "loss": 1.4606,
+ "step": 1957
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017414580689907377,
+ "loss": 0.714,
+ "step": 1958
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001741204650012656,
+ "loss": 1.2223,
+ "step": 1959
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017409511253549593,
+ "loss": 0.9828,
+ "step": 1960
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017406974950537942,
+ "loss": 0.9954,
+ "step": 1961
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017404437591453235,
+ "loss": 1.0307,
+ "step": 1962
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001740189917665724,
+ "loss": 0.9331,
+ "step": 1963
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739935970651188,
+ "loss": 1.3517,
+ "step": 1964
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017396819181379232,
+ "loss": 1.2024,
+ "step": 1965
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739427760162151,
+ "loss": 0.9696,
+ "step": 1966
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017391734967601102,
+ "loss": 1.1559,
+ "step": 1967
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001738919127968052,
+ "loss": 1.3104,
+ "step": 1968
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017386646538222443,
+ "loss": 0.9073,
+ "step": 1969
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017384100743589697,
+ "loss": 1.0539,
+ "step": 1970
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017381553896145255,
+ "loss": 0.9873,
+ "step": 1971
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737900599625224,
+ "loss": 0.9466,
+ "step": 1972
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737645704427393,
+ "loss": 1.0639,
+ "step": 1973
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737390704057375,
+ "loss": 0.5843,
+ "step": 1974
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017371355985515275,
+ "loss": 1.1318,
+ "step": 1975
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017368803879462227,
+ "loss": 1.0116,
+ "step": 1976
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001736625072277848,
+ "loss": 0.8845,
+ "step": 1977
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017363696515828062,
+ "loss": 0.8081,
+ "step": 1978
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017361141258975148,
+ "loss": 0.8795,
+ "step": 1979
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735858495258406,
+ "loss": 0.9725,
+ "step": 1980
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735602759701927,
+ "loss": 1.0164,
+ "step": 1981
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017353469192645405,
+ "loss": 1.2937,
+ "step": 1982
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735090973982723,
+ "loss": 1.0842,
+ "step": 1983
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017348349238929678,
+ "loss": 1.0043,
+ "step": 1984
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017345787690317815,
+ "loss": 1.1302,
+ "step": 1985
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017343225094356855,
+ "loss": 1.195,
+ "step": 1986
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017340661451412183,
+ "loss": 1.1449,
+ "step": 1987
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017338096761849309,
+ "loss": 1.2244,
+ "step": 1988
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017335531026033897,
+ "loss": 0.9273,
+ "step": 1989
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017332964244331776,
+ "loss": 1.0448,
+ "step": 1990
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017330396417108908,
+ "loss": 1.0074,
+ "step": 1991
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017327827544731412,
+ "loss": 0.9284,
+ "step": 1992
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.0001732525762756555,
+ "loss": 1.0307,
+ "step": 1993
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017322686665977737,
+ "loss": 1.1526,
+ "step": 1994
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017320114660334535,
+ "loss": 0.819,
+ "step": 1995
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017317541611002656,
+ "loss": 1.1029,
+ "step": 1996
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017314967518348962,
+ "loss": 1.2471,
+ "step": 1997
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017312392382740462,
+ "loss": 1.0156,
+ "step": 1998
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017309816204544317,
+ "loss": 1.1843,
+ "step": 1999
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017307238984127832,
+ "loss": 1.1588,
+ "step": 2000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017304660721858457,
+ "loss": 1.0157,
+ "step": 2001
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.000173020814181038,
+ "loss": 1.0563,
+ "step": 2002
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017299501073231622,
+ "loss": 1.1883,
+ "step": 2003
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017296919687609808,
+ "loss": 0.9404,
+ "step": 2004
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017294337261606417,
+ "loss": 1.2495,
+ "step": 2005
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017291753795589643,
+ "loss": 1.0074,
+ "step": 2006
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017289169289927837,
+ "loss": 1.1411,
+ "step": 2007
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017286583744989488,
+ "loss": 0.9942,
+ "step": 2008
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017283997161143239,
+ "loss": 0.952,
+ "step": 2009
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017281409538757883,
+ "loss": 1.2966,
+ "step": 2010
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017278820878202357,
+ "loss": 1.0836,
+ "step": 2011
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727623117984575,
+ "loss": 1.0984,
+ "step": 2012
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727364044405729,
+ "loss": 0.8822,
+ "step": 2013
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017271048671206366,
+ "loss": 1.2014,
+ "step": 2014
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017268455861662503,
+ "loss": 1.1779,
+ "step": 2015
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017265862015795384,
+ "loss": 0.9966,
+ "step": 2016
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017263267133974832,
+ "loss": 0.9536,
+ "step": 2017
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017260671216570822,
+ "loss": 0.811,
+ "step": 2018
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017258074263953472,
+ "loss": 0.8241,
+ "step": 2019
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017255476276493056,
+ "loss": 1.1263,
+ "step": 2020
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017252877254559986,
+ "loss": 0.995,
+ "step": 2021
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001725027719852483,
+ "loss": 1.1481,
+ "step": 2022
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001724767610875829,
+ "loss": 1.129,
+ "step": 2023
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017245073985631238,
+ "loss": 0.5928,
+ "step": 2024
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017242470829514672,
+ "loss": 0.8326,
+ "step": 2025
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017239866640779745,
+ "loss": 1.1092,
+ "step": 2026
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017237261419797756,
+ "loss": 1.5015,
+ "step": 2027
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001723465516694016,
+ "loss": 0.9775,
+ "step": 2028
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017232047882578548,
+ "loss": 0.9348,
+ "step": 2029
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001722943956708466,
+ "loss": 0.6199,
+ "step": 2030
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017226830220830384,
+ "loss": 1.1485,
+ "step": 2031
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017224219844187764,
+ "loss": 1.1195,
+ "step": 2032
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017221608437528973,
+ "loss": 1.0528,
+ "step": 2033
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017218996001226345,
+ "loss": 1.1058,
+ "step": 2034
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017216382535652355,
+ "loss": 1.1451,
+ "step": 2035
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001721376804117963,
+ "loss": 1.2251,
+ "step": 2036
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017211152518180936,
+ "loss": 1.0708,
+ "step": 2037
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017208535967029188,
+ "loss": 1.0746,
+ "step": 2038
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017205918388097456,
+ "loss": 1.3262,
+ "step": 2039
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017203299781758943,
+ "loss": 0.7619,
+ "step": 2040
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017200680148387007,
+ "loss": 1.01,
+ "step": 2041
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001719805948835515,
+ "loss": 1.1651,
+ "step": 2042
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017195437802037026,
+ "loss": 1.4671,
+ "step": 2043
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017192815089806424,
+ "loss": 0.9857,
+ "step": 2044
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001719019135203729,
+ "loss": 1.2613,
+ "step": 2045
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017187566589103704,
+ "loss": 1.4386,
+ "step": 2046
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001718494080137991,
+ "loss": 1.0965,
+ "step": 2047
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017182313989240285,
+ "loss": 0.752,
+ "step": 2048
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017179686153059352,
+ "loss": 0.9126,
+ "step": 2049
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017177057293211784,
+ "loss": 1.5075,
+ "step": 2050
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.000171744274100724,
+ "loss": 1.0407,
+ "step": 2051
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017171796504016166,
+ "loss": 0.8263,
+ "step": 2052
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.0001716916457541819,
+ "loss": 0.9453,
+ "step": 2053
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017166531624653722,
+ "loss": 0.9777,
+ "step": 2054
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017163897652098172,
+ "loss": 1.2129,
+ "step": 2055
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017161262658127086,
+ "loss": 1.3642,
+ "step": 2056
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017158626643116152,
+ "loss": 0.6798,
+ "step": 2057
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017155989607441213,
+ "loss": 0.874,
+ "step": 2058
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017153351551478247,
+ "loss": 1.0636,
+ "step": 2059
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001715071247560339,
+ "loss": 1.0563,
+ "step": 2060
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714807238019291,
+ "loss": 1.1984,
+ "step": 2061
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017145431265623234,
+ "loss": 0.9444,
+ "step": 2062
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714278913227092,
+ "loss": 0.7809,
+ "step": 2063
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017140145980512684,
+ "loss": 1.649,
+ "step": 2064
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713750181072538,
+ "loss": 1.0956,
+ "step": 2065
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713485662328601,
+ "loss": 1.2845,
+ "step": 2066
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017132210418571714,
+ "loss": 1.0484,
+ "step": 2067
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017129563196959793,
+ "loss": 1.0291,
+ "step": 2068
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017126914958827679,
+ "loss": 1.1226,
+ "step": 2069
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.0001712426570455295,
+ "loss": 1.0119,
+ "step": 2070
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017121615434513332,
+ "loss": 1.1663,
+ "step": 2071
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.000171189641490867,
+ "loss": 1.1353,
+ "step": 2072
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017116311848651064,
+ "loss": 1.0761,
+ "step": 2073
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017113658533584594,
+ "loss": 1.1978,
+ "step": 2074
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017111004204265582,
+ "loss": 1.3881,
+ "step": 2075
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017108348861072484,
+ "loss": 1.3945,
+ "step": 2076
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017105692504383897,
+ "loss": 1.3796,
+ "step": 2077
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017103035134578555,
+ "loss": 1.1721,
+ "step": 2078
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001710037675203534,
+ "loss": 1.0061,
+ "step": 2079
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017097717357133284,
+ "loss": 1.2456,
+ "step": 2080
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017095056950251555,
+ "loss": 0.788,
+ "step": 2081
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001709239553176947,
+ "loss": 1.16,
+ "step": 2082
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001708973310206649,
+ "loss": 1.0498,
+ "step": 2083
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017087069661522218,
+ "loss": 0.8993,
+ "step": 2084
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017084405210516406,
+ "loss": 1.2088,
+ "step": 2085
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001708173974942894,
+ "loss": 1.0897,
+ "step": 2086
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017079073278639863,
+ "loss": 1.2718,
+ "step": 2087
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017076405798529355,
+ "loss": 1.2325,
+ "step": 2088
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017073737309477736,
+ "loss": 1.0555,
+ "step": 2089
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017071067811865476,
+ "loss": 1.1428,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 2.3191208839416504,
+ "eval_runtime": 284.1375,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706839730607319,
+ "loss": 1.0908,
+ "step": 2091
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706572579248163,
+ "loss": 1.2092,
+ "step": 2092
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.000170630532714717,
+ "loss": 1.1735,
+ "step": 2093
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001706037974342444,
+ "loss": 1.2716,
+ "step": 2094
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017057705208721035,
+ "loss": 1.0095,
+ "step": 2095
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001705502966774282,
+ "loss": 1.3059,
+ "step": 2096
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017052353120871266,
+ "loss": 0.8269,
+ "step": 2097
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001704967556848799,
+ "loss": 1.0615,
+ "step": 2098
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017046997010974755,
+ "loss": 1.2709,
+ "step": 2099
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017044317448713461,
+ "loss": 1.1633,
+ "step": 2100
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017041636882086158,
+ "loss": 0.9273,
+ "step": 2101
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017038955311475038,
+ "loss": 1.3117,
+ "step": 2102
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001703627273726243,
+ "loss": 0.8883,
+ "step": 2103
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017033589159830815,
+ "loss": 1.1371,
+ "step": 2104
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017030904579562806,
+ "loss": 1.5402,
+ "step": 2105
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017028218996841172,
+ "loss": 0.9156,
+ "step": 2106
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017025532412048817,
+ "loss": 1.0962,
+ "step": 2107
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001702284482556879,
+ "loss": 0.9402,
+ "step": 2108
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017020156237784279,
+ "loss": 0.8146,
+ "step": 2109
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001701746664907862,
+ "loss": 1.1718,
+ "step": 2110
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017014776059835288,
+ "loss": 1.0618,
+ "step": 2111
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017012084470437907,
+ "loss": 1.4796,
+ "step": 2112
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017009391881270237,
+ "loss": 0.8402,
+ "step": 2113
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017006698292716178,
+ "loss": 1.1641,
+ "step": 2114
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001700400370515978,
+ "loss": 1.241,
+ "step": 2115
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017001308118985237,
+ "loss": 0.8683,
+ "step": 2116
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00016998611534576873,
+ "loss": 1.2697,
+ "step": 2117
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016995913952319168,
+ "loss": 0.9233,
+ "step": 2118
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016993215372596737,
+ "loss": 1.2472,
+ "step": 2119
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016990515795794334,
+ "loss": 1.2541,
+ "step": 2120
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016987815222296865,
+ "loss": 1.0016,
+ "step": 2121
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016985113652489374,
+ "loss": 1.0678,
+ "step": 2122
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016982411086757037,
+ "loss": 1.6066,
+ "step": 2123
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016979707525485192,
+ "loss": 1.229,
+ "step": 2124
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016977002969059302,
+ "loss": 0.752,
+ "step": 2125
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016974297417864977,
+ "loss": 0.8752,
+ "step": 2126
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001697159087228797,
+ "loss": 0.8896,
+ "step": 2127
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016968883332714186,
+ "loss": 0.9657,
+ "step": 2128
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696617479952964,
+ "loss": 1.3657,
+ "step": 2129
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696346527312053,
+ "loss": 0.9876,
+ "step": 2130
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016960754753873162,
+ "loss": 1.0165,
+ "step": 2131
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016958043242174003,
+ "loss": 1.625,
+ "step": 2132
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016955330738409655,
+ "loss": 1.5502,
+ "step": 2133
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016952617242966864,
+ "loss": 1.0793,
+ "step": 2134
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016949902756232507,
+ "loss": 1.4425,
+ "step": 2135
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016947187278593622,
+ "loss": 1.3124,
+ "step": 2136
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016944470810437365,
+ "loss": 0.927,
+ "step": 2137
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016941753352151055,
+ "loss": 1.1911,
+ "step": 2138
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016939034904122138,
+ "loss": 1.0768,
+ "step": 2139
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016936315466738205,
+ "loss": 1.1277,
+ "step": 2140
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016933595040386984,
+ "loss": 0.812,
+ "step": 2141
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.0001693087362545636,
+ "loss": 0.8299,
+ "step": 2142
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016928151222334338,
+ "loss": 1.1125,
+ "step": 2143
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016925427831409077,
+ "loss": 1.1835,
+ "step": 2144
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016922703453068873,
+ "loss": 1.2007,
+ "step": 2145
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016919978087702163,
+ "loss": 0.8524,
+ "step": 2146
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016917251735697523,
+ "loss": 0.9497,
+ "step": 2147
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016914524397443673,
+ "loss": 1.1004,
+ "step": 2148
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016911796073329466,
+ "loss": 0.8347,
+ "step": 2149
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016909066763743912,
+ "loss": 0.9492,
+ "step": 2150
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016906336469076148,
+ "loss": 1.1406,
+ "step": 2151
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016903605189715447,
+ "loss": 1.0137,
+ "step": 2152
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001690087292605124,
+ "loss": 1.0624,
+ "step": 2153
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016898139678473076,
+ "loss": 1.1767,
+ "step": 2154
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001689540544737067,
+ "loss": 1.4184,
+ "step": 2155
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016892670233133856,
+ "loss": 0.957,
+ "step": 2156
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016889934036152618,
+ "loss": 1.0399,
+ "step": 2157
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016887196856817073,
+ "loss": 1.2009,
+ "step": 2158
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016884458695517495,
+ "loss": 1.3977,
+ "step": 2159
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016881719552644273,
+ "loss": 1.1328,
+ "step": 2160
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016878979428587955,
+ "loss": 1.5007,
+ "step": 2161
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016876238323739221,
+ "loss": 1.1248,
+ "step": 2162
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016873496238488899,
+ "loss": 1.0358,
+ "step": 2163
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016870753173227945,
+ "loss": 1.2961,
+ "step": 2164
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016868009128347459,
+ "loss": 0.9435,
+ "step": 2165
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016865264104238683,
+ "loss": 0.9642,
+ "step": 2166
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016862518101293,
+ "loss": 1.0169,
+ "step": 2167
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016859771119901929,
+ "loss": 1.0904,
+ "step": 2168
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.0001685702316045713,
+ "loss": 1.3178,
+ "step": 2169
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016854274223350397,
+ "loss": 1.1395,
+ "step": 2170
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016851524308973678,
+ "loss": 1.1207,
+ "step": 2171
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016848773417719044,
+ "loss": 1.3544,
+ "step": 2172
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016846021549978715,
+ "loss": 1.3503,
+ "step": 2173
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016843268706145042,
+ "loss": 1.4276,
+ "step": 2174
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016840514886610529,
+ "loss": 0.9888,
+ "step": 2175
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016837760091767802,
+ "loss": 1.0913,
+ "step": 2176
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001683500432200964,
+ "loss": 1.4781,
+ "step": 2177
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016832247577728955,
+ "loss": 1.2657,
+ "step": 2178
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.000168294898593188,
+ "loss": 0.9206,
+ "step": 2179
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682673116717236,
+ "loss": 0.9218,
+ "step": 2180
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682397150168297,
+ "loss": 1.2719,
+ "step": 2181
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016821210863244096,
+ "loss": 0.984,
+ "step": 2182
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016818449252249345,
+ "loss": 1.4641,
+ "step": 2183
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001681568666909246,
+ "loss": 1.2571,
+ "step": 2184
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016812923114167328,
+ "loss": 1.2025,
+ "step": 2185
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016810158587867973,
+ "loss": 0.9621,
+ "step": 2186
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016807393090588553,
+ "loss": 1.0016,
+ "step": 2187
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016804626622723368,
+ "loss": 1.031,
+ "step": 2188
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016801859184666857,
+ "loss": 0.7573,
+ "step": 2189
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016799090776813597,
+ "loss": 1.2694,
+ "step": 2190
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.000167963213995583,
+ "loss": 1.196,
+ "step": 2191
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016793551053295822,
+ "loss": 0.8754,
+ "step": 2192
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016790779738421152,
+ "loss": 1.1743,
+ "step": 2193
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678800745532942,
+ "loss": 1.0921,
+ "step": 2194
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016785234204415888,
+ "loss": 0.8778,
+ "step": 2195
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678245998607597,
+ "loss": 1.0528,
+ "step": 2196
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016779684800705203,
+ "loss": 1.0255,
+ "step": 2197
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001677690864869927,
+ "loss": 0.6344,
+ "step": 2198
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016774131530453992,
+ "loss": 0.8691,
+ "step": 2199
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016771353446365318,
+ "loss": 1.2061,
+ "step": 2200
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001676857439682935,
+ "loss": 1.1759,
+ "step": 2201
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016765794382242314,
+ "loss": 1.1118,
+ "step": 2202
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016763013403000584,
+ "loss": 1.3005,
+ "step": 2203
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016760231459500666,
+ "loss": 1.0415,
+ "step": 2204
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.000167574485521392,
+ "loss": 0.824,
+ "step": 2205
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016754664681312975,
+ "loss": 0.6682,
+ "step": 2206
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016751879847418905,
+ "loss": 1.9204,
+ "step": 2207
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016749094050854047,
+ "loss": 0.9931,
+ "step": 2208
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016746307292015602,
+ "loss": 0.8898,
+ "step": 2209
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016743519571300888,
+ "loss": 1.3337,
+ "step": 2210
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016740730889107383,
+ "loss": 1.2947,
+ "step": 2211
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673794124583269,
+ "loss": 1.1882,
+ "step": 2212
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673515064187455,
+ "loss": 1.5408,
+ "step": 2213
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016732359077630847,
+ "loss": 1.1273,
+ "step": 2214
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001672956655349959,
+ "loss": 0.8954,
+ "step": 2215
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016726773069878934,
+ "loss": 1.1747,
+ "step": 2216
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016723978627167173,
+ "loss": 0.807,
+ "step": 2217
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016721183225762727,
+ "loss": 1.2512,
+ "step": 2218
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016718386866064166,
+ "loss": 1.0796,
+ "step": 2219
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016715589548470185,
+ "loss": 1.0905,
+ "step": 2220
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016712791273379622,
+ "loss": 1.3779,
+ "step": 2221
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016709992041191452,
+ "loss": 1.2015,
+ "step": 2222
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016707191852304782,
+ "loss": 0.8612,
+ "step": 2223
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001670439070711886,
+ "loss": 1.1819,
+ "step": 2224
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016701588606033064,
+ "loss": 1.2715,
+ "step": 2225
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001669878554944692,
+ "loss": 1.3681,
+ "step": 2226
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016695981537760072,
+ "loss": 1.1254,
+ "step": 2227
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669317657137232,
+ "loss": 0.9476,
+ "step": 2228
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669037065068359,
+ "loss": 1.235,
+ "step": 2229
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016687563776093941,
+ "loss": 0.7356,
+ "step": 2230
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016684755948003573,
+ "loss": 0.7901,
+ "step": 2231
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016681947166812824,
+ "loss": 1.317,
+ "step": 2232
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016679137432922163,
+ "loss": 0.8832,
+ "step": 2233
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016676326746732195,
+ "loss": 1.2776,
+ "step": 2234
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016673515108643665,
+ "loss": 1.0435,
+ "step": 2235
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001667070251905745,
+ "loss": 1.0957,
+ "step": 2236
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016667888978374567,
+ "loss": 1.0862,
+ "step": 2237
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016665074486996165,
+ "loss": 1.1112,
+ "step": 2238
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001666225904532352,
+ "loss": 1.3633,
+ "step": 2239
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016659442653758064,
+ "loss": 1.444,
+ "step": 2240
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016656625312701348,
+ "loss": 0.8248,
+ "step": 2241
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016653807022555067,
+ "loss": 1.2522,
+ "step": 2242
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001665098778372104,
+ "loss": 1.2107,
+ "step": 2243
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001664816759660124,
+ "loss": 1.0813,
+ "step": 2244
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016645346461597753,
+ "loss": 1.1136,
+ "step": 2245
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016642524379112817,
+ "loss": 1.1003,
+ "step": 2246
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.000166397013495488,
+ "loss": 1.0635,
+ "step": 2247
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016636877373308204,
+ "loss": 1.0575,
+ "step": 2248
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016634052450793663,
+ "loss": 0.7693,
+ "step": 2249
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016631226582407952,
+ "loss": 1.5965,
+ "step": 2250
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001662839976855398,
+ "loss": 1.0989,
+ "step": 2251
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016625572009634787,
+ "loss": 0.9198,
+ "step": 2252
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016622743306053548,
+ "loss": 1.0896,
+ "step": 2253
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016619913658213578,
+ "loss": 1.015,
+ "step": 2254
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.0001661708306651832,
+ "loss": 0.8572,
+ "step": 2255
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016614251531371353,
+ "loss": 1.1508,
+ "step": 2256
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.000166114190531764,
+ "loss": 1.1852,
+ "step": 2257
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016608585632337306,
+ "loss": 0.932,
+ "step": 2258
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016605751269258053,
+ "loss": 1.2542,
+ "step": 2259
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016602915964342757,
+ "loss": 0.943,
+ "step": 2260
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016600079717995678,
+ "loss": 1.2438,
+ "step": 2261
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016597242530621203,
+ "loss": 0.9928,
+ "step": 2262
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016594404402623845,
+ "loss": 0.9516,
+ "step": 2263
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016591565334408265,
+ "loss": 1.1689,
+ "step": 2264
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.0001658872532637925,
+ "loss": 1.3155,
+ "step": 2265
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016585884378941725,
+ "loss": 1.1596,
+ "step": 2266
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016583042492500746,
+ "loss": 0.9956,
+ "step": 2267
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016580199667461508,
+ "loss": 0.9289,
+ "step": 2268
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016577355904229325,
+ "loss": 1.3225,
+ "step": 2269
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016574511203209667,
+ "loss": 1.0384,
+ "step": 2270
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001657166556480812,
+ "loss": 0.697,
+ "step": 2271
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016568818989430416,
+ "loss": 0.7702,
+ "step": 2272
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016565971477482404,
+ "loss": 1.1041,
+ "step": 2273
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016563123029370093,
+ "loss": 1.0462,
+ "step": 2274
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001656027364549959,
+ "loss": 1.0797,
+ "step": 2275
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001655742332627717,
+ "loss": 1.3301,
+ "step": 2276
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.0001655457207210922,
+ "loss": 1.0467,
+ "step": 2277
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016551719883402271,
+ "loss": 0.9432,
+ "step": 2278
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016548866760562978,
+ "loss": 1.1808,
+ "step": 2279
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016546012703998138,
+ "loss": 1.1094,
+ "step": 2280
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016543157714114673,
+ "loss": 1.3914,
+ "step": 2281
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016540301791319645,
+ "loss": 1.0402,
+ "step": 2282
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016537444936020246,
+ "loss": 0.9815,
+ "step": 2283
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.000165345871486238,
+ "loss": 0.9722,
+ "step": 2284
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016531728429537766,
+ "loss": 0.919,
+ "step": 2285
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016528868779169738,
+ "loss": 1.1242,
+ "step": 2286
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016526008197927436,
+ "loss": 1.1794,
+ "step": 2287
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016523146686218718,
+ "loss": 1.434,
+ "step": 2288
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016520284244451574,
+ "loss": 0.8463,
+ "step": 2289
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016517420873034123,
+ "loss": 1.1736,
+ "step": 2290
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.0001651455657237462,
+ "loss": 1.0431,
+ "step": 2291
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016511691342881453,
+ "loss": 1.2796,
+ "step": 2292
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650882518496314,
+ "loss": 1.0578,
+ "step": 2293
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016505958099028334,
+ "loss": 1.3914,
+ "step": 2294
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650309008548582,
+ "loss": 1.0046,
+ "step": 2295
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650022114474451,
+ "loss": 1.0246,
+ "step": 2296
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016497351277213458,
+ "loss": 1.2789,
+ "step": 2297
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016494480483301836,
+ "loss": 1.0036,
+ "step": 2298
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016491608763418968,
+ "loss": 0.886,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "eval_loss": 2.3017475605010986,
+ "eval_runtime": 283.8846,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648873611797429,
+ "loss": 1.3953,
+ "step": 2300
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648586254737738,
+ "loss": 0.6972,
+ "step": 2301
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016482988052037947,
+ "loss": 1.2311,
+ "step": 2302
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016480112632365833,
+ "loss": 1.327,
+ "step": 2303
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647723628877101,
+ "loss": 0.9534,
+ "step": 2304
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647435902166358,
+ "loss": 0.9164,
+ "step": 2305
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647148083145378,
+ "loss": 1.1038,
+ "step": 2306
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016468601718551976,
+ "loss": 1.0444,
+ "step": 2307
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016465721683368666,
+ "loss": 1.2635,
+ "step": 2308
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016462840726314486,
+ "loss": 1.1647,
+ "step": 2309
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016459958847800187,
+ "loss": 1.3617,
+ "step": 2310
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016457076048236675,
+ "loss": 1.2355,
+ "step": 2311
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016454192328034962,
+ "loss": 0.9989,
+ "step": 2312
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016451307687606213,
+ "loss": 1.1218,
+ "step": 2313
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016448422127361706,
+ "loss": 0.8967,
+ "step": 2314
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644553564771287,
+ "loss": 1.159,
+ "step": 2315
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644264824907124,
+ "loss": 1.5901,
+ "step": 2316
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001643975993184851,
+ "loss": 0.979,
+ "step": 2317
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016436870696456482,
+ "loss": 0.8561,
+ "step": 2318
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016433980543307107,
+ "loss": 0.9485,
+ "step": 2319
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016431089472812444,
+ "loss": 0.7736,
+ "step": 2320
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016428197485384707,
+ "loss": 1.2546,
+ "step": 2321
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016425304581436226,
+ "loss": 0.9534,
+ "step": 2322
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001642241076137947,
+ "loss": 0.8182,
+ "step": 2323
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641951602562703,
+ "loss": 1.1107,
+ "step": 2324
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641662037459164,
+ "loss": 1.0628,
+ "step": 2325
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016413723808686147,
+ "loss": 1.6261,
+ "step": 2326
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001641082632832354,
+ "loss": 1.0286,
+ "step": 2327
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001640792793391694,
+ "loss": 0.5732,
+ "step": 2328
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016405028625879594,
+ "loss": 1.0932,
+ "step": 2329
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016402128404624882,
+ "loss": 1.2585,
+ "step": 2330
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016399227270566308,
+ "loss": 0.8788,
+ "step": 2331
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639632522411751,
+ "loss": 1.1397,
+ "step": 2332
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016393422265692262,
+ "loss": 1.3517,
+ "step": 2333
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639051839570446,
+ "loss": 1.1346,
+ "step": 2334
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016387613614568126,
+ "loss": 0.9594,
+ "step": 2335
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001638470792269743,
+ "loss": 1.0674,
+ "step": 2336
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016381801320506653,
+ "loss": 0.9123,
+ "step": 2337
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016378893808410215,
+ "loss": 1.1909,
+ "step": 2338
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016375985386822664,
+ "loss": 1.0474,
+ "step": 2339
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016373076056158675,
+ "loss": 0.8844,
+ "step": 2340
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001637016581683306,
+ "loss": 1.1606,
+ "step": 2341
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016367254669260749,
+ "loss": 0.6206,
+ "step": 2342
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016364342613856816,
+ "loss": 0.7225,
+ "step": 2343
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016361429651036446,
+ "loss": 1.1782,
+ "step": 2344
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016358515781214977,
+ "loss": 1.0911,
+ "step": 2345
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016355601004807856,
+ "loss": 1.2727,
+ "step": 2346
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016352685322230663,
+ "loss": 0.8294,
+ "step": 2347
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016349768733899117,
+ "loss": 1.1661,
+ "step": 2348
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016346851240229057,
+ "loss": 0.8267,
+ "step": 2349
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016343932841636456,
+ "loss": 1.2873,
+ "step": 2350
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016341013538537412,
+ "loss": 1.2459,
+ "step": 2351
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016338093331348156,
+ "loss": 0.8939,
+ "step": 2352
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016335172220485042,
+ "loss": 1.024,
+ "step": 2353
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.0001633225020636456,
+ "loss": 0.9981,
+ "step": 2354
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016329327289403325,
+ "loss": 1.331,
+ "step": 2355
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016326403470018084,
+ "loss": 0.7446,
+ "step": 2356
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016323478748625703,
+ "loss": 1.1931,
+ "step": 2357
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016320553125643187,
+ "loss": 1.1287,
+ "step": 2358
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016317626601487667,
+ "loss": 1.109,
+ "step": 2359
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016314699176576402,
+ "loss": 0.9946,
+ "step": 2360
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016311770851326778,
+ "loss": 0.8347,
+ "step": 2361
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016308841626156307,
+ "loss": 0.9214,
+ "step": 2362
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.0001630591150148264,
+ "loss": 0.5907,
+ "step": 2363
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016302980477723539,
+ "loss": 1.2412,
+ "step": 2364
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016300048555296915,
+ "loss": 1.2908,
+ "step": 2365
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016297115734620788,
+ "loss": 1.2345,
+ "step": 2366
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016294182016113315,
+ "loss": 1.0418,
+ "step": 2367
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016291247400192785,
+ "loss": 1.1457,
+ "step": 2368
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016288311887277608,
+ "loss": 1.2529,
+ "step": 2369
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016285375477786322,
+ "loss": 1.0013,
+ "step": 2370
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016282438172137597,
+ "loss": 0.943,
+ "step": 2371
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016279499970750226,
+ "loss": 0.7009,
+ "step": 2372
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016276560874043137,
+ "loss": 0.9408,
+ "step": 2373
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627362088243538,
+ "loss": 1.1788,
+ "step": 2374
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627067999634613,
+ "loss": 0.8106,
+ "step": 2375
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016267738216194696,
+ "loss": 1.1695,
+ "step": 2376
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001626479554240051,
+ "loss": 0.9209,
+ "step": 2377
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016261851975383137,
+ "loss": 0.9911,
+ "step": 2378
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016258907515562262,
+ "loss": 1.3819,
+ "step": 2379
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162559621633577,
+ "loss": 0.8926,
+ "step": 2380
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162530159191894,
+ "loss": 1.0896,
+ "step": 2381
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016250068783477424,
+ "loss": 0.8403,
+ "step": 2382
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016247120756641972,
+ "loss": 0.7976,
+ "step": 2383
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001624417183910337,
+ "loss": 0.8881,
+ "step": 2384
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001624122203128207,
+ "loss": 0.8302,
+ "step": 2385
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623827133359865,
+ "loss": 1.3312,
+ "step": 2386
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623531974647381,
+ "loss": 1.003,
+ "step": 2387
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623236727032839,
+ "loss": 0.9487,
+ "step": 2388
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016229413905583342,
+ "loss": 1.2259,
+ "step": 2389
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016226459652659753,
+ "loss": 0.9327,
+ "step": 2390
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016223504511978838,
+ "loss": 0.7336,
+ "step": 2391
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016220548483961934,
+ "loss": 1.0454,
+ "step": 2392
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016217591569030505,
+ "loss": 1.3371,
+ "step": 2393
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016214633767606143,
+ "loss": 1.0814,
+ "step": 2394
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016211675080110566,
+ "loss": 1.2274,
+ "step": 2395
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001620871550696562,
+ "loss": 0.9775,
+ "step": 2396
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016205755048593273,
+ "loss": 1.0323,
+ "step": 2397
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016202793705415622,
+ "loss": 1.5101,
+ "step": 2398
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016199831477854893,
+ "loss": 0.8118,
+ "step": 2399
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001619686836633343,
+ "loss": 1.0233,
+ "step": 2400
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016193904371273715,
+ "loss": 0.9038,
+ "step": 2401
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016190939493098344,
+ "loss": 0.875,
+ "step": 2402
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016187973732230038,
+ "loss": 1.3274,
+ "step": 2403
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016185007089091665,
+ "loss": 1.081,
+ "step": 2404
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016182039564106192,
+ "loss": 1.0841,
+ "step": 2405
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016179071157696728,
+ "loss": 1.3208,
+ "step": 2406
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.000161761018702865,
+ "loss": 1.1854,
+ "step": 2407
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617313170229887,
+ "loss": 1.0651,
+ "step": 2408
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617016065415731,
+ "loss": 1.1398,
+ "step": 2409
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016167188726285434,
+ "loss": 1.2778,
+ "step": 2410
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016164215919106968,
+ "loss": 1.6758,
+ "step": 2411
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.0001616124223304577,
+ "loss": 0.8341,
+ "step": 2412
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016158267668525832,
+ "loss": 0.9513,
+ "step": 2413
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016155292225971253,
+ "loss": 0.9617,
+ "step": 2414
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016152315905806268,
+ "loss": 0.8664,
+ "step": 2415
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016149338708455237,
+ "loss": 1.331,
+ "step": 2416
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016146360634342643,
+ "loss": 1.4212,
+ "step": 2417
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016143381683893094,
+ "loss": 1.2126,
+ "step": 2418
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016140401857531322,
+ "loss": 0.934,
+ "step": 2419
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016137421155682183,
+ "loss": 1.2417,
+ "step": 2420
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001613443957877067,
+ "loss": 1.637,
+ "step": 2421
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016131457127221881,
+ "loss": 1.1456,
+ "step": 2422
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016128473801461053,
+ "loss": 0.9402,
+ "step": 2423
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612548960191354,
+ "loss": 1.3797,
+ "step": 2424
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612250452900483,
+ "loss": 0.8191,
+ "step": 2425
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001611951858316052,
+ "loss": 1.1725,
+ "step": 2426
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016116531764806346,
+ "loss": 1.5701,
+ "step": 2427
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016113544074368164,
+ "loss": 1.0591,
+ "step": 2428
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016110555512271953,
+ "loss": 1.03,
+ "step": 2429
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.0001610756607894382,
+ "loss": 1.1829,
+ "step": 2430
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016104575774809985,
+ "loss": 1.2222,
+ "step": 2431
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016101584600296804,
+ "loss": 1.1537,
+ "step": 2432
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016098592555830753,
+ "loss": 1.0973,
+ "step": 2433
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016095599641838436,
+ "loss": 1.0793,
+ "step": 2434
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016092605858746573,
+ "loss": 1.3484,
+ "step": 2435
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608961120698201,
+ "loss": 1.1689,
+ "step": 2436
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016086615686971726,
+ "loss": 1.0864,
+ "step": 2437
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016083619299142813,
+ "loss": 1.2451,
+ "step": 2438
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608062204392249,
+ "loss": 0.9593,
+ "step": 2439
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016077623921738102,
+ "loss": 0.9816,
+ "step": 2440
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016074624933017112,
+ "loss": 1.0845,
+ "step": 2441
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016071625078187114,
+ "loss": 0.9875,
+ "step": 2442
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001606862435767582,
+ "loss": 0.8758,
+ "step": 2443
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016065622771911067,
+ "loss": 0.9499,
+ "step": 2444
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016062620321320823,
+ "loss": 1.1133,
+ "step": 2445
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605961700633316,
+ "loss": 0.7228,
+ "step": 2446
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016056612827376293,
+ "loss": 1.2297,
+ "step": 2447
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605360778487855,
+ "loss": 1.0251,
+ "step": 2448
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016050601879268386,
+ "loss": 0.8097,
+ "step": 2449
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016047595110974376,
+ "loss": 0.9872,
+ "step": 2450
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001604458748042522,
+ "loss": 1.1119,
+ "step": 2451
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.0001604157898804974,
+ "loss": 0.8256,
+ "step": 2452
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016038569634276882,
+ "loss": 0.9036,
+ "step": 2453
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016035559419535716,
+ "loss": 1.1173,
+ "step": 2454
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016032548344255428,
+ "loss": 1.3173,
+ "step": 2455
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016029536408865337,
+ "loss": 0.717,
+ "step": 2456
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016026523613794878,
+ "loss": 0.9806,
+ "step": 2457
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016023509959473605,
+ "loss": 1.1509,
+ "step": 2458
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016020495446331207,
+ "loss": 1.0454,
+ "step": 2459
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601748007479748,
+ "loss": 1.183,
+ "step": 2460
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601446384530236,
+ "loss": 1.2611,
+ "step": 2461
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016011446758275888,
+ "loss": 1.0377,
+ "step": 2462
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016008428814148236,
+ "loss": 1.2111,
+ "step": 2463
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016005410013349698,
+ "loss": 1.0952,
+ "step": 2464
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016002390356310685,
+ "loss": 0.7589,
+ "step": 2465
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015999369843461742,
+ "loss": 0.8543,
+ "step": 2466
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015996348475233525,
+ "loss": 1.1509,
+ "step": 2467
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001599332625205681,
+ "loss": 1.287,
+ "step": 2468
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015990303174362512,
+ "loss": 1.0401,
+ "step": 2469
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001598727924258164,
+ "loss": 1.0247,
+ "step": 2470
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015984254457145354,
+ "loss": 1.1537,
+ "step": 2471
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015981228818484917,
+ "loss": 0.9606,
+ "step": 2472
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001597820232703172,
+ "loss": 0.8709,
+ "step": 2473
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015975174983217275,
+ "loss": 1.2827,
+ "step": 2474
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015972146787473213,
+ "loss": 0.8057,
+ "step": 2475
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001596911774023129,
+ "loss": 1.0857,
+ "step": 2476
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015966087841923386,
+ "loss": 1.1731,
+ "step": 2477
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001596305709298149,
+ "loss": 0.8871,
+ "step": 2478
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015960025493837727,
+ "loss": 1.0671,
+ "step": 2479
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015956993044924334,
+ "loss": 1.3735,
+ "step": 2480
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015953959746673675,
+ "loss": 1.4655,
+ "step": 2481
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015950925599518228,
+ "loss": 1.3975,
+ "step": 2482
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015947890603890602,
+ "loss": 0.9468,
+ "step": 2483
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001594485476022352,
+ "loss": 0.9976,
+ "step": 2484
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015941818068949818,
+ "loss": 0.6732,
+ "step": 2485
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015938780530502474,
+ "loss": 0.9848,
+ "step": 2486
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015935742145314568,
+ "loss": 1.2441,
+ "step": 2487
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001593270291381931,
+ "loss": 0.9631,
+ "step": 2488
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015929662836450029,
+ "loss": 0.8868,
+ "step": 2489
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592662191364017,
+ "loss": 0.9063,
+ "step": 2490
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015923580145823303,
+ "loss": 0.6886,
+ "step": 2491
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592053753343312,
+ "loss": 1.0702,
+ "step": 2492
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591749407690343,
+ "loss": 1.3879,
+ "step": 2493
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015914449776668167,
+ "loss": 1.1048,
+ "step": 2494
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591140463316137,
+ "loss": 0.9921,
+ "step": 2495
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015908358646817225,
+ "loss": 1.3042,
+ "step": 2496
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015905311818070015,
+ "loss": 0.8413,
+ "step": 2497
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015902264147354153,
+ "loss": 1.5201,
+ "step": 2498
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589921563510417,
+ "loss": 1.0727,
+ "step": 2499
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 8330,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 833,
+ "total_flos": 8.759182245299749e+18,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-2499/trainer_state.json:com.dropbox.attrs b/checkpoint-2499/trainer_state.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..9fd6611e12b41e1b2fd70f17d37311c319364183
Binary files /dev/null and b/checkpoint-2499/trainer_state.json:com.dropbox.attrs differ
diff --git a/checkpoint-2499/training_args.bin b/checkpoint-2499/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207
--- /dev/null
+++ b/checkpoint-2499/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869
+size 4859
diff --git a/checkpoint-2499/training_args.bin:com.dropbox.attrs b/checkpoint-2499/training_args.bin:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..f78d4c43df3553079fd444cc09a35dcab0da0dc0
Binary files /dev/null and b/checkpoint-2499/training_args.bin:com.dropbox.attrs differ
diff --git a/checkpoint-3332/README.md b/checkpoint-3332/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda
--- /dev/null
+++ b/checkpoint-3332/README.md
@@ -0,0 +1,218 @@
+---
+library_name: peft
+base_model: mistralai/Mixtral-8x7B-v0.1
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+## Training procedure
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+
+### Framework versions
+
+- PEFT 0.7.0
\ No newline at end of file
diff --git a/checkpoint-3332/README.md:com.dropbox.attrs b/checkpoint-3332/README.md:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..d4ea5a4819f5b88611f492ccaa1560c047b15425
Binary files /dev/null and b/checkpoint-3332/README.md:com.dropbox.attrs differ
diff --git a/checkpoint-3332/adapter_config.json b/checkpoint-3332/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a
--- /dev/null
+++ b/checkpoint-3332/adapter_config.json
@@ -0,0 +1,32 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 64,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "k_proj",
+ "w1",
+ "gate",
+ "w2",
+ "q_proj",
+ "w3",
+ "o_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-3332/adapter_config.json:com.dropbox.attrs b/checkpoint-3332/adapter_config.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..491ddfd74c4651b57be163d90b5d5352e977ffd2
Binary files /dev/null and b/checkpoint-3332/adapter_config.json:com.dropbox.attrs differ
diff --git a/checkpoint-3332/adapter_model.safetensors b/checkpoint-3332/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e7e28c34c5f984931729b039b7badc160f79d9f5
--- /dev/null
+++ b/checkpoint-3332/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6aba448d9e0bde1a7588f5911a70400a25fc5b580ca9a19bdc61064c309d44e2
+size 3875879784
diff --git a/checkpoint-3332/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-3332/adapter_model.safetensors:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..0e564adbb59a865a9bb1c636791481c45b2e6521
Binary files /dev/null and b/checkpoint-3332/adapter_model.safetensors:com.dropbox.attrs differ
diff --git a/checkpoint-3332/optimizer.pt b/checkpoint-3332/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c4640a9051bb43e1ff546e09f47249ab93e14bb2
--- /dev/null
+++ b/checkpoint-3332/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:572a2148943aa0b9915e0c66ed54c6810942cd70ca870d8c80bd91a56f3b3062
+size 1943844127
diff --git a/checkpoint-3332/optimizer.pt:com.dropbox.attrs b/checkpoint-3332/optimizer.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..64d60e0cfadd824cf25a2fd9b8b963758c5c8c78
Binary files /dev/null and b/checkpoint-3332/optimizer.pt:com.dropbox.attrs differ
diff --git a/checkpoint-3332/rng_state.pth b/checkpoint-3332/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..01ccf8262ab7e072475fa1f343d8ce0c70048143
--- /dev/null
+++ b/checkpoint-3332/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f56fbf03556b83b300734506a4b44924fcfbbd1dc974be3566502841913603d
+size 14575
diff --git a/checkpoint-3332/rng_state.pth:com.dropbox.attrs b/checkpoint-3332/rng_state.pth:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..93e60fd0b2e9606daa22118664c6d887be4ad152
Binary files /dev/null and b/checkpoint-3332/rng_state.pth:com.dropbox.attrs differ
diff --git a/checkpoint-3332/scheduler.pt b/checkpoint-3332/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3cda8916d448a3fc3573118ac5e6f0861698b1b3
--- /dev/null
+++ b/checkpoint-3332/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45f027f63a5866ebd7ac82e11e9e9f21867f7cc8ddb480ab63bacaf52ce8549a
+size 627
diff --git a/checkpoint-3332/scheduler.pt:com.dropbox.attrs b/checkpoint-3332/scheduler.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..1f55f785aeb799daa383b2a9b6714f842d8fabc6
Binary files /dev/null and b/checkpoint-3332/scheduler.pt:com.dropbox.attrs differ
diff --git a/checkpoint-3332/trainer_state.json b/checkpoint-3332/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..94c9a64b7097278604fe9790073231fc78896c8a
--- /dev/null
+++ b/checkpoint-3332/trainer_state.json
@@ -0,0 +1,20141 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 3.956782713085234,
+ "eval_steps": 209,
+ "global_step": 3332,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0,
+ "learning_rate": 2e-05,
+ "loss": 2.1426,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "eval_loss": 2.071432113647461,
+ "eval_runtime": 279.6718,
+ "eval_samples_per_second": 0.737,
+ "eval_steps_per_second": 0.737,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4e-05,
+ "loss": 2.4033,
+ "step": 2
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 6e-05,
+ "loss": 2.1893,
+ "step": 3
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 8e-05,
+ "loss": 2.3226,
+ "step": 4
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0001,
+ "loss": 2.2485,
+ "step": 5
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00012,
+ "loss": 1.9704,
+ "step": 6
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00014,
+ "loss": 1.6929,
+ "step": 7
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00016,
+ "loss": 2.2957,
+ "step": 8
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00018,
+ "loss": 1.9907,
+ "step": 9
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0002,
+ "loss": 2.1295,
+ "step": 10
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999999287109068,
+ "loss": 2.2249,
+ "step": 11
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999997148436365,
+ "loss": 2.1733,
+ "step": 12
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.000199999935839822,
+ "loss": 2.1404,
+ "step": 13
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999988593747084,
+ "loss": 2.0236,
+ "step": 14
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999982177731722,
+ "loss": 1.9639,
+ "step": 15
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999974335937034,
+ "loss": 1.692,
+ "step": 16
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999965068364137,
+ "loss": 2.3609,
+ "step": 17
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999954375014348,
+ "loss": 2.3553,
+ "step": 18
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999942255889198,
+ "loss": 1.5733,
+ "step": 19
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999928710990412,
+ "loss": 1.7505,
+ "step": 20
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999913740319922,
+ "loss": 2.3068,
+ "step": 21
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999897343879862,
+ "loss": 1.8371,
+ "step": 22
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.0001999987952167257,
+ "loss": 1.9852,
+ "step": 23
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999860273700585,
+ "loss": 1.9625,
+ "step": 24
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999839599966655,
+ "loss": 2.1089,
+ "step": 25
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999817500473724,
+ "loss": 2.1086,
+ "step": 26
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999793975224945,
+ "loss": 2.0284,
+ "step": 27
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999769024223673,
+ "loss": 2.3641,
+ "step": 28
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999742647473464,
+ "loss": 1.963,
+ "step": 29
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999714844978078,
+ "loss": 2.0635,
+ "step": 30
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999968561674148,
+ "loss": 1.9304,
+ "step": 31
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999654962767839,
+ "loss": 1.4124,
+ "step": 32
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999622883061518,
+ "loss": 2.1444,
+ "step": 33
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999589377627102,
+ "loss": 1.6477,
+ "step": 34
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999955444646936,
+ "loss": 2.2601,
+ "step": 35
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999518089593282,
+ "loss": 1.6256,
+ "step": 36
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999948030700404,
+ "loss": 1.9155,
+ "step": 37
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999441098707025,
+ "loss": 2.1408,
+ "step": 38
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999400464707832,
+ "loss": 2.104,
+ "step": 39
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.0001999935840501225,
+ "loss": 1.9841,
+ "step": 40
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999314919626272,
+ "loss": 1.5924,
+ "step": 41
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999270008556108,
+ "loss": 1.9956,
+ "step": 42
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999223671808154,
+ "loss": 1.4673,
+ "step": 43
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999175909389018,
+ "loss": 2.1595,
+ "step": 44
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999126721305513,
+ "loss": 1.8439,
+ "step": 45
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999076107564648,
+ "loss": 1.9961,
+ "step": 46
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999024068173638,
+ "loss": 2.1504,
+ "step": 47
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998970603139912,
+ "loss": 2.2907,
+ "step": 48
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999891571247108,
+ "loss": 1.5709,
+ "step": 49
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999885939617498,
+ "loss": 2.4504,
+ "step": 50
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998801654259632,
+ "loss": 2.3787,
+ "step": 51
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999874248673328,
+ "loss": 2.0434,
+ "step": 52
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998681893604347,
+ "loss": 2.1671,
+ "step": 53
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999861987488148,
+ "loss": 1.7432,
+ "step": 54
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998556430573521,
+ "loss": 1.7737,
+ "step": 55
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998491560689513,
+ "loss": 2.0122,
+ "step": 56
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999842526523871,
+ "loss": 1.7545,
+ "step": 57
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998357544230558,
+ "loss": 2.201,
+ "step": 58
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998288397674716,
+ "loss": 2.0396,
+ "step": 59
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999821782558104,
+ "loss": 1.9275,
+ "step": 60
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998145827959598,
+ "loss": 1.7797,
+ "step": 61
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999807240482065,
+ "loss": 2.1463,
+ "step": 62
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997997556174665,
+ "loss": 1.935,
+ "step": 63
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999792128203232,
+ "loss": 2.1182,
+ "step": 64
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999784358240448,
+ "loss": 2.2297,
+ "step": 65
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997764457302234,
+ "loss": 2.1052,
+ "step": 66
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999768390673686,
+ "loss": 2.0777,
+ "step": 67
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997601930719835,
+ "loss": 2.1419,
+ "step": 68
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999751852926286,
+ "loss": 2.2586,
+ "step": 69
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997433702377817,
+ "loss": 1.9089,
+ "step": 70
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997347450076801,
+ "loss": 2.0587,
+ "step": 71
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997259772372116,
+ "loss": 2.4143,
+ "step": 72
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997170669276256,
+ "loss": 1.947,
+ "step": 73
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997080140801932,
+ "loss": 2.008,
+ "step": 74
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996988186962041,
+ "loss": 2.4912,
+ "step": 75
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996894807769707,
+ "loss": 2.0279,
+ "step": 76
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996800003238232,
+ "loss": 1.9914,
+ "step": 77
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.0001999670377338114,
+ "loss": 1.9091,
+ "step": 78
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996606118212148,
+ "loss": 1.8038,
+ "step": 79
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996507037745183,
+ "loss": 2.3573,
+ "step": 80
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996406531994364,
+ "loss": 2.3204,
+ "step": 81
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999630460097403,
+ "loss": 2.1619,
+ "step": 82
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999620124469871,
+ "loss": 1.9977,
+ "step": 83
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996096463183142,
+ "loss": 2.195,
+ "step": 84
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995990256442263,
+ "loss": 1.9909,
+ "step": 85
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995882624491217,
+ "loss": 2.2001,
+ "step": 86
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995773567345354,
+ "loss": 1.5795,
+ "step": 87
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995663085020212,
+ "loss": 2.174,
+ "step": 88
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995551177531557,
+ "loss": 1.9605,
+ "step": 89
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995437844895334,
+ "loss": 2.1768,
+ "step": 90
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999532308712771,
+ "loss": 1.6906,
+ "step": 91
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995206904245037,
+ "loss": 2.1029,
+ "step": 92
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995089296263893,
+ "loss": 2.0652,
+ "step": 93
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019994970263201035,
+ "loss": 2.1733,
+ "step": 94
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999484980507344,
+ "loss": 1.9413,
+ "step": 95
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999472792189828,
+ "loss": 1.9538,
+ "step": 96
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994604613692935,
+ "loss": 2.4158,
+ "step": 97
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994479880474988,
+ "loss": 1.8964,
+ "step": 98
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999435372226222,
+ "loss": 2.3135,
+ "step": 99
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999422613907262,
+ "loss": 2.127,
+ "step": 100
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994097130924374,
+ "loss": 1.9954,
+ "step": 101
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993966697835883,
+ "loss": 2.1363,
+ "step": 102
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993834839825738,
+ "loss": 1.7779,
+ "step": 103
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993701556912742,
+ "loss": 2.0923,
+ "step": 104
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993566849115898,
+ "loss": 1.9183,
+ "step": 105
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993430716454413,
+ "loss": 1.7894,
+ "step": 106
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993293158947694,
+ "loss": 2.0094,
+ "step": 107
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999315417661536,
+ "loss": 2.1469,
+ "step": 108
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999301376947722,
+ "loss": 1.6924,
+ "step": 109
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999287193755329,
+ "loss": 2.1794,
+ "step": 110
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.000199927286808638,
+ "loss": 2.1338,
+ "step": 111
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019992583999429178,
+ "loss": 1.9988,
+ "step": 112
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999243789327004,
+ "loss": 2.0735,
+ "step": 113
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999229036240723,
+ "loss": 2.0521,
+ "step": 114
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019992141406861776,
+ "loss": 1.9441,
+ "step": 115
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991991026654918,
+ "loss": 2.1244,
+ "step": 116
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999183922180809,
+ "loss": 1.7937,
+ "step": 117
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999168599234295,
+ "loss": 2.2603,
+ "step": 118
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991531338281332,
+ "loss": 2.1846,
+ "step": 119
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991375259645293,
+ "loss": 2.3241,
+ "step": 120
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991217756457085,
+ "loss": 2.0926,
+ "step": 121
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991058828739165,
+ "loss": 2.0092,
+ "step": 122
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990898476514193,
+ "loss": 1.8076,
+ "step": 123
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990736699805029,
+ "loss": 2.0369,
+ "step": 124
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990573498634742,
+ "loss": 2.0488,
+ "step": 125
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.000199904088730266,
+ "loss": 2.1534,
+ "step": 126
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990242823004074,
+ "loss": 2.1406,
+ "step": 127
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990075348590839,
+ "loss": 1.9379,
+ "step": 128
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019989906449810775,
+ "loss": 1.9781,
+ "step": 129
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989736126687963,
+ "loss": 1.973,
+ "step": 130
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989564379246683,
+ "loss": 1.6825,
+ "step": 131
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989391207511428,
+ "loss": 2.0843,
+ "step": 132
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989216611506887,
+ "loss": 1.8547,
+ "step": 133
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989040591257952,
+ "loss": 1.7626,
+ "step": 134
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.0001998886314678972,
+ "loss": 2.0531,
+ "step": 135
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988684278127497,
+ "loss": 2.0031,
+ "step": 136
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988503985296773,
+ "loss": 1.9342,
+ "step": 137
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988322268323268,
+ "loss": 2.3297,
+ "step": 138
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988139127232878,
+ "loss": 2.3401,
+ "step": 139
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987954562051725,
+ "loss": 1.8983,
+ "step": 140
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.0001998776857280612,
+ "loss": 2.0621,
+ "step": 141
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987581159522578,
+ "loss": 2.0574,
+ "step": 142
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987392322227824,
+ "loss": 1.9516,
+ "step": 143
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987202060948783,
+ "loss": 2.1402,
+ "step": 144
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987010375712577,
+ "loss": 1.8903,
+ "step": 145
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986817266546539,
+ "loss": 1.8248,
+ "step": 146
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986622733478204,
+ "loss": 1.9877,
+ "step": 147
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986426776535306,
+ "loss": 1.6272,
+ "step": 148
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986229395745785,
+ "loss": 1.8605,
+ "step": 149
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986030591137783,
+ "loss": 1.6848,
+ "step": 150
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985830362739647,
+ "loss": 2.1922,
+ "step": 151
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998562871057992,
+ "loss": 2.0238,
+ "step": 152
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998542563468736,
+ "loss": 2.2246,
+ "step": 153
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985221135090914,
+ "loss": 1.9438,
+ "step": 154
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019985015211819744,
+ "loss": 2.2136,
+ "step": 155
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998480786490321,
+ "loss": 2.4563,
+ "step": 156
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984599094370874,
+ "loss": 2.2138,
+ "step": 157
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984388900252503,
+ "loss": 2.2679,
+ "step": 158
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984177282578064,
+ "loss": 1.9537,
+ "step": 159
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998396424137773,
+ "loss": 2.0803,
+ "step": 160
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998374977668188,
+ "loss": 2.0282,
+ "step": 161
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019983533888521087,
+ "loss": 2.0157,
+ "step": 162
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998331657692613,
+ "loss": 1.7837,
+ "step": 163
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019983097841928,
+ "loss": 2.1556,
+ "step": 164
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982877683557879,
+ "loss": 2.1447,
+ "step": 165
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982656101847162,
+ "loss": 2.4139,
+ "step": 166
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998243309682743,
+ "loss": 1.6788,
+ "step": 167
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982208668530493,
+ "loss": 1.9008,
+ "step": 168
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998198281698834,
+ "loss": 2.173,
+ "step": 169
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019981755542233177,
+ "loss": 2.1837,
+ "step": 170
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981526844297404,
+ "loss": 2.0639,
+ "step": 171
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981296723213632,
+ "loss": 2.3864,
+ "step": 172
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981065179014673,
+ "loss": 1.923,
+ "step": 173
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980832211733535,
+ "loss": 1.9192,
+ "step": 174
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980597821403438,
+ "loss": 2.0335,
+ "step": 175
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.000199803620080578,
+ "loss": 1.8172,
+ "step": 176
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.0001998012477173024,
+ "loss": 2.0294,
+ "step": 177
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979886112454586,
+ "loss": 2.2889,
+ "step": 178
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979646030264867,
+ "loss": 1.8498,
+ "step": 179
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997940452519531,
+ "loss": 2.0797,
+ "step": 180
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997916159728035,
+ "loss": 2.2356,
+ "step": 181
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997891724655462,
+ "loss": 2.1187,
+ "step": 182
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978671473052964,
+ "loss": 1.9301,
+ "step": 183
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978424276810423,
+ "loss": 1.8582,
+ "step": 184
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997817565786224,
+ "loss": 2.144,
+ "step": 185
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977925616243862,
+ "loss": 2.0595,
+ "step": 186
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977674151990945,
+ "loss": 1.9104,
+ "step": 187
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977421265139332,
+ "loss": 1.9727,
+ "step": 188
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977166955725088,
+ "loss": 1.8727,
+ "step": 189
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997691122378447,
+ "loss": 2.0611,
+ "step": 190
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997665406935394,
+ "loss": 2.0745,
+ "step": 191
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997639549247016,
+ "loss": 1.9974,
+ "step": 192
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019976135493169996,
+ "loss": 1.9856,
+ "step": 193
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975874071490526,
+ "loss": 1.778,
+ "step": 194
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975611227469016,
+ "loss": 1.8347,
+ "step": 195
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997534696114294,
+ "loss": 1.5555,
+ "step": 196
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019975081272549989,
+ "loss": 1.5625,
+ "step": 197
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974814161728032,
+ "loss": 1.9997,
+ "step": 198
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974545628715157,
+ "loss": 1.9523,
+ "step": 199
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974275673549654,
+ "loss": 2.1557,
+ "step": 200
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974004296270006,
+ "loss": 1.8306,
+ "step": 201
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973731496914914,
+ "loss": 2.0051,
+ "step": 202
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973457275523264,
+ "loss": 2.201,
+ "step": 203
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997318163213416,
+ "loss": 2.2446,
+ "step": 204
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972904566786903,
+ "loss": 2.1172,
+ "step": 205
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972626079520995,
+ "loss": 1.9849,
+ "step": 206
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972346170376142,
+ "loss": 1.9774,
+ "step": 207
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.0001997206483939225,
+ "loss": 1.7625,
+ "step": 208
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971782086609436,
+ "loss": 2.2346,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "eval_loss": 2.00066876411438,
+ "eval_runtime": 282.7648,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971497912068013,
+ "loss": 2.4185,
+ "step": 210
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971212315808497,
+ "loss": 1.946,
+ "step": 211
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019970925297871605,
+ "loss": 2.0049,
+ "step": 212
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970636858298267,
+ "loss": 1.9545,
+ "step": 213
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970346997129598,
+ "loss": 1.9636,
+ "step": 214
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970055714406938,
+ "loss": 1.9068,
+ "step": 215
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969763010171807,
+ "loss": 1.5749,
+ "step": 216
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969468884465942,
+ "loss": 1.7676,
+ "step": 217
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996917333733128,
+ "loss": 2.0329,
+ "step": 218
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996887636880996,
+ "loss": 1.9307,
+ "step": 219
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019968577978944323,
+ "loss": 2.134,
+ "step": 220
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019968278167776908,
+ "loss": 2.0911,
+ "step": 221
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967976935350467,
+ "loss": 2.5057,
+ "step": 222
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.0001996767428170795,
+ "loss": 1.9267,
+ "step": 223
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967370206892503,
+ "loss": 2.3569,
+ "step": 224
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967064710947488,
+ "loss": 1.992,
+ "step": 225
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966757793916454,
+ "loss": 2.01,
+ "step": 226
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966449455843165,
+ "loss": 1.8037,
+ "step": 227
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966139696771587,
+ "loss": 2.2498,
+ "step": 228
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019965828516745876,
+ "loss": 1.6563,
+ "step": 229
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996551591581041,
+ "loss": 1.979,
+ "step": 230
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996520189400975,
+ "loss": 2.1553,
+ "step": 231
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996488645138867,
+ "loss": 1.8743,
+ "step": 232
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964569587992148,
+ "loss": 2.1907,
+ "step": 233
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964251303865362,
+ "loss": 2.0644,
+ "step": 234
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019963931599053692,
+ "loss": 2.1721,
+ "step": 235
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996361047360272,
+ "loss": 2.2267,
+ "step": 236
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996328792755823,
+ "loss": 1.9445,
+ "step": 237
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019962963960966213,
+ "loss": 2.2003,
+ "step": 238
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996263857387286,
+ "loss": 2.3114,
+ "step": 239
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996231176632456,
+ "loss": 1.8553,
+ "step": 240
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961983538367914,
+ "loss": 2.1349,
+ "step": 241
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961653890049715,
+ "loss": 1.8784,
+ "step": 242
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996132282141697,
+ "loss": 2.0118,
+ "step": 243
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960990332516874,
+ "loss": 1.9938,
+ "step": 244
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960656423396834,
+ "loss": 2.2582,
+ "step": 245
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019960321094104465,
+ "loss": 2.1807,
+ "step": 246
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959984344687578,
+ "loss": 1.9084,
+ "step": 247
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959646175194174,
+ "loss": 2.2879,
+ "step": 248
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995930658567248,
+ "loss": 1.942,
+ "step": 249
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958965576170908,
+ "loss": 2.1313,
+ "step": 250
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958623146738088,
+ "loss": 2.3202,
+ "step": 251
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995827929742283,
+ "loss": 1.7832,
+ "step": 252
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957934028274162,
+ "loss": 1.7103,
+ "step": 253
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957587339341321,
+ "loss": 1.9912,
+ "step": 254
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995723923067373,
+ "loss": 1.6686,
+ "step": 255
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956889702321023,
+ "loss": 1.966,
+ "step": 256
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956538754333034,
+ "loss": 2.2287,
+ "step": 257
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956186386759804,
+ "loss": 1.4866,
+ "step": 258
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995583259965157,
+ "loss": 1.9599,
+ "step": 259
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019955477393058773,
+ "loss": 1.9273,
+ "step": 260
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995512076703206,
+ "loss": 1.847,
+ "step": 261
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019954762721622279,
+ "loss": 2.0535,
+ "step": 262
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995440325688048,
+ "loss": 2.4403,
+ "step": 263
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019954042372857908,
+ "loss": 1.8712,
+ "step": 264
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953680069606026,
+ "loss": 2.1837,
+ "step": 265
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953316347176488,
+ "loss": 2.0398,
+ "step": 266
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995295120562115,
+ "loss": 2.1135,
+ "step": 267
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952584644992075,
+ "loss": 2.0358,
+ "step": 268
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952216665341526,
+ "loss": 2.3282,
+ "step": 269
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995184726672197,
+ "loss": 1.9741,
+ "step": 270
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951476449186074,
+ "loss": 1.7523,
+ "step": 271
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951104212786712,
+ "loss": 2.1509,
+ "step": 272
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001995073055757695,
+ "loss": 2.0865,
+ "step": 273
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019950355483610067,
+ "loss": 1.8972,
+ "step": 274
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019949978990939542,
+ "loss": 2.4693,
+ "step": 275
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994960107961905,
+ "loss": 1.9307,
+ "step": 276
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994922174970248,
+ "loss": 2.0097,
+ "step": 277
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994884100124391,
+ "loss": 1.6561,
+ "step": 278
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994845883429763,
+ "loss": 2.3069,
+ "step": 279
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019948075248918124,
+ "loss": 2.0134,
+ "step": 280
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947690245160091,
+ "loss": 2.1061,
+ "step": 281
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947303823078416,
+ "loss": 2.0855,
+ "step": 282
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946915982728197,
+ "loss": 1.5672,
+ "step": 283
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.0001994652672416473,
+ "loss": 1.7289,
+ "step": 284
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946136047443522,
+ "loss": 1.9013,
+ "step": 285
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945743952620268,
+ "loss": 2.3105,
+ "step": 286
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945350439750872,
+ "loss": 2.341,
+ "step": 287
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944955508891443,
+ "loss": 1.88,
+ "step": 288
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994455916009829,
+ "loss": 1.913,
+ "step": 289
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944161393427922,
+ "loss": 1.9513,
+ "step": 290
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943762208937053,
+ "loss": 2.3331,
+ "step": 291
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943361606682597,
+ "loss": 2.3024,
+ "step": 292
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942959586721672,
+ "loss": 2.2222,
+ "step": 293
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942556149111598,
+ "loss": 2.1003,
+ "step": 294
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994215129390989,
+ "loss": 1.9038,
+ "step": 295
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941745021174282,
+ "loss": 1.6068,
+ "step": 296
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941337330962693,
+ "loss": 1.8894,
+ "step": 297
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940928223333252,
+ "loss": 2.3158,
+ "step": 298
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.0001994051769834429,
+ "loss": 2.1015,
+ "step": 299
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940105756054337,
+ "loss": 2.1519,
+ "step": 300
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939692396522127,
+ "loss": 1.7233,
+ "step": 301
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939277619806598,
+ "loss": 1.85,
+ "step": 302
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938861425966887,
+ "loss": 2.2368,
+ "step": 303
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938443815062335,
+ "loss": 1.765,
+ "step": 304
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993802478715248,
+ "loss": 1.6333,
+ "step": 305
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937604342297073,
+ "loss": 2.191,
+ "step": 306
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937182480556055,
+ "loss": 2.2402,
+ "step": 307
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019936759201989577,
+ "loss": 2.0568,
+ "step": 308
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993633450665799,
+ "loss": 2.4314,
+ "step": 309
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935908394621844,
+ "loss": 2.0556,
+ "step": 310
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935480865941894,
+ "loss": 2.0988,
+ "step": 311
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935051920679094,
+ "loss": 2.0964,
+ "step": 312
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019934621558894607,
+ "loss": 1.9365,
+ "step": 313
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993418978064979,
+ "loss": 1.6224,
+ "step": 314
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933756586006202,
+ "loss": 2.144,
+ "step": 315
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933321975025616,
+ "loss": 2.2899,
+ "step": 316
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019932885947769992,
+ "loss": 1.8865,
+ "step": 317
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.000199324485043015,
+ "loss": 2.3996,
+ "step": 318
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993200964468251,
+ "loss": 1.3858,
+ "step": 319
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019931569368975588,
+ "loss": 2.2231,
+ "step": 320
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019931127677243516,
+ "loss": 2.0537,
+ "step": 321
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930684569549264,
+ "loss": 2.1381,
+ "step": 322
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930240045956012,
+ "loss": 2.0152,
+ "step": 323
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992979410652714,
+ "loss": 2.0293,
+ "step": 324
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019929346751326228,
+ "loss": 1.7457,
+ "step": 325
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928897980417057,
+ "loss": 1.987,
+ "step": 326
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928447793863616,
+ "loss": 2.2451,
+ "step": 327
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019927996191730093,
+ "loss": 2.3312,
+ "step": 328
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992754317408087,
+ "loss": 1.8771,
+ "step": 329
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992708874098054,
+ "loss": 1.833,
+ "step": 330
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926632892493896,
+ "loss": 1.9343,
+ "step": 331
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926175628685937,
+ "loss": 2.2328,
+ "step": 332
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992571694962185,
+ "loss": 1.9916,
+ "step": 333
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992525685536704,
+ "loss": 1.9497,
+ "step": 334
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.000199247953459871,
+ "loss": 2.029,
+ "step": 335
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019924332421547835,
+ "loss": 2.0326,
+ "step": 336
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992386808211525,
+ "loss": 2.6406,
+ "step": 337
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019923402327755546,
+ "loss": 2.3811,
+ "step": 338
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922935158535129,
+ "loss": 1.6143,
+ "step": 339
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922466574520608,
+ "loss": 2.2182,
+ "step": 340
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921996575778794,
+ "loss": 2.218,
+ "step": 341
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.000199215251623767,
+ "loss": 1.8615,
+ "step": 342
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921052334381534,
+ "loss": 2.165,
+ "step": 343
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019920578091860716,
+ "loss": 2.1627,
+ "step": 344
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.0001992010243488186,
+ "loss": 2.154,
+ "step": 345
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919625363512786,
+ "loss": 1.5966,
+ "step": 346
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919146877821512,
+ "loss": 2.0903,
+ "step": 347
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991866697787626,
+ "loss": 2.2322,
+ "step": 348
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019918185663745456,
+ "loss": 1.9319,
+ "step": 349
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917702935497725,
+ "loss": 2.1367,
+ "step": 350
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917218793201886,
+ "loss": 2.1767,
+ "step": 351
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019916733236926976,
+ "loss": 2.1009,
+ "step": 352
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991624626674222,
+ "loss": 2.1286,
+ "step": 353
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991575788271705,
+ "loss": 2.181,
+ "step": 354
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019915268084921101,
+ "loss": 2.12,
+ "step": 355
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019914776873424206,
+ "loss": 1.9895,
+ "step": 356
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.000199142842482964,
+ "loss": 1.9285,
+ "step": 357
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991379020960792,
+ "loss": 2.2376,
+ "step": 358
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991329475742921,
+ "loss": 2.1274,
+ "step": 359
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912797891830908,
+ "loss": 2.0043,
+ "step": 360
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912299612883852,
+ "loss": 2.022,
+ "step": 361
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019911799920659093,
+ "loss": 1.7343,
+ "step": 362
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991129881522787,
+ "loss": 2.0621,
+ "step": 363
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019910796296661632,
+ "loss": 1.5116,
+ "step": 364
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991029236503203,
+ "loss": 2.0485,
+ "step": 365
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909787020410907,
+ "loss": 1.971,
+ "step": 366
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909280262870324,
+ "loss": 1.9724,
+ "step": 367
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908772092482524,
+ "loss": 1.318,
+ "step": 368
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908262509319964,
+ "loss": 2.0539,
+ "step": 369
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019907751513455302,
+ "loss": 2.1097,
+ "step": 370
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019907239104961392,
+ "loss": 2.0632,
+ "step": 371
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906725283911296,
+ "loss": 2.1897,
+ "step": 372
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906210050378266,
+ "loss": 2.2002,
+ "step": 373
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905693404435773,
+ "loss": 1.9005,
+ "step": 374
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905175346157474,
+ "loss": 1.9873,
+ "step": 375
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904655875617233,
+ "loss": 1.7215,
+ "step": 376
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904134992889113,
+ "loss": 2.0434,
+ "step": 377
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903612698047383,
+ "loss": 2.4223,
+ "step": 378
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903088991166513,
+ "loss": 2.0837,
+ "step": 379
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902563872321172,
+ "loss": 2.2389,
+ "step": 380
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902037341586225,
+ "loss": 1.7205,
+ "step": 381
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001990150939903675,
+ "loss": 1.9577,
+ "step": 382
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019900980044748015,
+ "loss": 1.8778,
+ "step": 383
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.000199004492787955,
+ "loss": 2.2213,
+ "step": 384
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899917101254874,
+ "loss": 2.0927,
+ "step": 385
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899383512202019,
+ "loss": 2.2921,
+ "step": 386
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001989884851171301,
+ "loss": 2.2983,
+ "step": 387
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989831209986413,
+ "loss": 1.8052,
+ "step": 388
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897774276731857,
+ "loss": 1.7741,
+ "step": 389
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897235042392873,
+ "loss": 1.779,
+ "step": 390
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896694396924063,
+ "loss": 1.6924,
+ "step": 391
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896152340402509,
+ "loss": 2.036,
+ "step": 392
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019895608872905494,
+ "loss": 2.04,
+ "step": 393
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989506399451051,
+ "loss": 2.1702,
+ "step": 394
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019894517705295245,
+ "loss": 1.9429,
+ "step": 395
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893970005337584,
+ "loss": 2.0528,
+ "step": 396
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893420894715618,
+ "loss": 1.7906,
+ "step": 397
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989287037350764,
+ "loss": 2.3494,
+ "step": 398
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019892318441792138,
+ "loss": 1.7415,
+ "step": 399
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989176509964781,
+ "loss": 2.0184,
+ "step": 400
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989121034715355,
+ "loss": 1.9277,
+ "step": 401
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989065418438845,
+ "loss": 2.2168,
+ "step": 402
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019890096611431814,
+ "loss": 2.6114,
+ "step": 403
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019889537628363133,
+ "loss": 2.0713,
+ "step": 404
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888977235262104,
+ "loss": 2.2966,
+ "step": 405
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888415432208636,
+ "loss": 2.5206,
+ "step": 406
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887852219282822,
+ "loss": 2.4503,
+ "step": 407
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887287596564966,
+ "loss": 2.102,
+ "step": 408
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886721564135572,
+ "loss": 2.3275,
+ "step": 409
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886154122075343,
+ "loss": 2.0481,
+ "step": 410
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885585270465182,
+ "loss": 1.8395,
+ "step": 411
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885015009386202,
+ "loss": 2.3535,
+ "step": 412
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.000198844433389197,
+ "loss": 2.0147,
+ "step": 413
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988387025914719,
+ "loss": 2.1919,
+ "step": 414
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988329577015038,
+ "loss": 2.156,
+ "step": 415
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882719872011176,
+ "loss": 2.2672,
+ "step": 416
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882142564811694,
+ "loss": 2.3242,
+ "step": 417
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988156384863424,
+ "loss": 2.0259,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "eval_loss": 1.9941134452819824,
+ "eval_runtime": 282.533,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880983723561332,
+ "loss": 1.7039,
+ "step": 419
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880402189675678,
+ "loss": 2.1007,
+ "step": 420
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879819247060193,
+ "loss": 2.2297,
+ "step": 421
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879234895797996,
+ "loss": 1.6166,
+ "step": 422
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.000198786491359724,
+ "loss": 2.408,
+ "step": 423
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019878061967666915,
+ "loss": 1.686,
+ "step": 424
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.0001987747339096527,
+ "loss": 2.0492,
+ "step": 425
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876883405951377,
+ "loss": 2.2179,
+ "step": 426
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876292012709356,
+ "loss": 1.8812,
+ "step": 427
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019875699211323528,
+ "loss": 2.2888,
+ "step": 428
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019875105001878409,
+ "loss": 2.0561,
+ "step": 429
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019874509384458725,
+ "loss": 1.9299,
+ "step": 430
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873912359149397,
+ "loss": 2.1999,
+ "step": 431
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873313926035548,
+ "loss": 1.8509,
+ "step": 432
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019872714085202503,
+ "loss": 1.8281,
+ "step": 433
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987211283673578,
+ "loss": 1.8359,
+ "step": 434
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987151018072111,
+ "loss": 2.2844,
+ "step": 435
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870906117244416,
+ "loss": 1.9397,
+ "step": 436
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870300646391824,
+ "loss": 2.302,
+ "step": 437
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869693768249661,
+ "loss": 2.1176,
+ "step": 438
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869085482904458,
+ "loss": 2.1909,
+ "step": 439
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986847579044294,
+ "loss": 2.2382,
+ "step": 440
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867864690952035,
+ "loss": 2.0988,
+ "step": 441
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867252184518878,
+ "loss": 2.2136,
+ "step": 442
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986663827123079,
+ "loss": 1.9324,
+ "step": 443
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019866022951175308,
+ "loss": 2.1274,
+ "step": 444
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019865406224440165,
+ "loss": 1.8625,
+ "step": 445
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019864788091113287,
+ "loss": 2.0009,
+ "step": 446
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.0001986416855128281,
+ "loss": 2.2245,
+ "step": 447
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019863547605037063,
+ "loss": 2.0654,
+ "step": 448
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862925252464586,
+ "loss": 1.4339,
+ "step": 449
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862301493654108,
+ "loss": 2.1347,
+ "step": 450
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861676328694562,
+ "loss": 1.7029,
+ "step": 451
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861049757675088,
+ "loss": 2.0081,
+ "step": 452
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019860421780685018,
+ "loss": 1.9994,
+ "step": 453
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985979239781389,
+ "loss": 1.9325,
+ "step": 454
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019859161609151436,
+ "loss": 1.8502,
+ "step": 455
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.000198585294147876,
+ "loss": 2.3779,
+ "step": 456
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019857895814812509,
+ "loss": 2.0303,
+ "step": 457
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985726080931651,
+ "loss": 1.9898,
+ "step": 458
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019856624398390137,
+ "loss": 1.7648,
+ "step": 459
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019855986582124126,
+ "loss": 1.7822,
+ "step": 460
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985534736060942,
+ "loss": 1.9219,
+ "step": 461
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019854706733937155,
+ "loss": 2.1789,
+ "step": 462
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019854064702198675,
+ "loss": 1.9091,
+ "step": 463
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019853421265485514,
+ "loss": 1.9941,
+ "step": 464
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985277642388941,
+ "loss": 1.904,
+ "step": 465
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019852130177502316,
+ "loss": 1.6299,
+ "step": 466
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985148252641636,
+ "loss": 1.7712,
+ "step": 467
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019850833470723886,
+ "loss": 1.6825,
+ "step": 468
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985018301051744,
+ "loss": 1.7408,
+ "step": 469
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019849531145889758,
+ "loss": 2.0622,
+ "step": 470
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019848877876933784,
+ "loss": 1.5699,
+ "step": 471
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.0001984822320374266,
+ "loss": 2.0253,
+ "step": 472
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019847567126409724,
+ "loss": 2.2186,
+ "step": 473
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019846909645028523,
+ "loss": 2.0872,
+ "step": 474
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.000198462507596928,
+ "loss": 1.9362,
+ "step": 475
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019845590470496497,
+ "loss": 2.4109,
+ "step": 476
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844928777533753,
+ "loss": 2.2626,
+ "step": 477
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844265680898918,
+ "loss": 2.0874,
+ "step": 478
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984360118068653,
+ "loss": 2.1606,
+ "step": 479
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984293527699133,
+ "loss": 2.063,
+ "step": 480
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019842267969908265,
+ "loss": 1.9065,
+ "step": 481
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984159925953248,
+ "loss": 1.9511,
+ "step": 482
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840929145959317,
+ "loss": 2.056,
+ "step": 483
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840257629284317,
+ "loss": 2.2353,
+ "step": 484
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019839584709603226,
+ "loss": 1.9401,
+ "step": 485
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001983891038701199,
+ "loss": 1.9648,
+ "step": 486
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019838234661606748,
+ "loss": 1.753,
+ "step": 487
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019837557533483846,
+ "loss": 1.7805,
+ "step": 488
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836879002739827,
+ "loss": 2.192,
+ "step": 489
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836199069471437,
+ "loss": 1.9112,
+ "step": 490
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019835517733775615,
+ "loss": 2.0119,
+ "step": 491
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.0001983483499574951,
+ "loss": 1.8932,
+ "step": 492
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019834150855490464,
+ "loss": 1.5968,
+ "step": 493
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019833465313096017,
+ "loss": 2.1493,
+ "step": 494
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019832778368663917,
+ "loss": 1.8863,
+ "step": 495
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.000198320900222921,
+ "loss": 2.2134,
+ "step": 496
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019831400274078717,
+ "loss": 2.2831,
+ "step": 497
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019830709124122112,
+ "loss": 2.0266,
+ "step": 498
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001983001657252082,
+ "loss": 2.3392,
+ "step": 499
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019829322619373588,
+ "loss": 1.8426,
+ "step": 500
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019828627264779363,
+ "loss": 2.0742,
+ "step": 501
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001982793050883728,
+ "loss": 1.9578,
+ "step": 502
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019827232351646686,
+ "loss": 2.0863,
+ "step": 503
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982653279330712,
+ "loss": 2.2881,
+ "step": 504
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019825831833918323,
+ "loss": 1.8869,
+ "step": 505
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982512947358024,
+ "loss": 1.8997,
+ "step": 506
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019824425712393012,
+ "loss": 1.8945,
+ "step": 507
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019823720550456977,
+ "loss": 1.9496,
+ "step": 508
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982301398787268,
+ "loss": 2.1066,
+ "step": 509
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019822306024740852,
+ "loss": 1.958,
+ "step": 510
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019821596661162447,
+ "loss": 2.1112,
+ "step": 511
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019820885897238596,
+ "loss": 2.1012,
+ "step": 512
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001982017373307064,
+ "loss": 2.2623,
+ "step": 513
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019819460168760117,
+ "loss": 2.5058,
+ "step": 514
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981874520440877,
+ "loss": 2.1367,
+ "step": 515
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019818028840118532,
+ "loss": 2.2743,
+ "step": 516
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019817311075991543,
+ "loss": 1.5517,
+ "step": 517
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981659191213014,
+ "loss": 1.9569,
+ "step": 518
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815871348636863,
+ "loss": 2.0566,
+ "step": 519
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815149385614444,
+ "loss": 1.8859,
+ "step": 520
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019814426023165825,
+ "loss": 2.0298,
+ "step": 521
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019813701261394136,
+ "loss": 2.0614,
+ "step": 522
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812975100402715,
+ "loss": 2.221,
+ "step": 523
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812247540295096,
+ "loss": 2.1255,
+ "step": 524
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019811518581175014,
+ "loss": 2.1885,
+ "step": 525
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.000198107882231464,
+ "loss": 2.3918,
+ "step": 526
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019810056466313392,
+ "loss": 2.2759,
+ "step": 527
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019809323310780318,
+ "loss": 1.9727,
+ "step": 528
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980858875665171,
+ "loss": 2.0417,
+ "step": 529
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019807852804032305,
+ "loss": 1.645,
+ "step": 530
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980711545302703,
+ "loss": 1.7943,
+ "step": 531
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019806376703741015,
+ "loss": 1.8844,
+ "step": 532
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019805636556279588,
+ "loss": 2.1128,
+ "step": 533
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980489501074828,
+ "loss": 2.0272,
+ "step": 534
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019804152067252816,
+ "loss": 2.0916,
+ "step": 535
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019803407725899131,
+ "loss": 1.7287,
+ "step": 536
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019802661986793342,
+ "loss": 2.0667,
+ "step": 537
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801914850041784,
+ "loss": 2.4016,
+ "step": 538
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801166315750978,
+ "loss": 1.8557,
+ "step": 539
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.0001980041638402765,
+ "loss": 1.8072,
+ "step": 540
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019799665054978722,
+ "loss": 2.2252,
+ "step": 541
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798912328711322,
+ "loss": 2.1377,
+ "step": 542
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798158205332764,
+ "loss": 2.0306,
+ "step": 543
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019797402684950576,
+ "loss": 1.7428,
+ "step": 544
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019796645767672477,
+ "loss": 2.0843,
+ "step": 545
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795887453606388,
+ "loss": 1.9175,
+ "step": 546
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795127742860423,
+ "loss": 1.6673,
+ "step": 547
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.0001979436663554291,
+ "loss": 1.5553,
+ "step": 548
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019793604131762357,
+ "loss": 1.604,
+ "step": 549
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792840231627482,
+ "loss": 2.023,
+ "step": 550
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792074935247206,
+ "loss": 1.8399,
+ "step": 551
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019791308242730638,
+ "loss": 1.8579,
+ "step": 552
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019790540154187094,
+ "loss": 2.2135,
+ "step": 553
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019789770669726087,
+ "loss": 1.7894,
+ "step": 554
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788999789457326,
+ "loss": 2.1723,
+ "step": 555
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788227513490723,
+ "loss": 2.0881,
+ "step": 556
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019787453841936393,
+ "loss": 1.7181,
+ "step": 557
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019786678774904638,
+ "loss": 1.8725,
+ "step": 558
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785902312505964,
+ "loss": 2.0544,
+ "step": 559
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785124454851084,
+ "loss": 1.7503,
+ "step": 560
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.000197843452020509,
+ "loss": 2.01,
+ "step": 561
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019783564554216518,
+ "loss": 1.748,
+ "step": 562
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978278251145924,
+ "loss": 2.0866,
+ "step": 563
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978199907389057,
+ "loss": 1.6046,
+ "step": 564
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019781214241622208,
+ "loss": 1.9222,
+ "step": 565
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019780428014766051,
+ "loss": 2.2003,
+ "step": 566
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019779640393434206,
+ "loss": 2.0534,
+ "step": 567
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001977885137773896,
+ "loss": 1.8609,
+ "step": 568
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019778060967792817,
+ "loss": 2.0666,
+ "step": 569
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019777269163708468,
+ "loss": 1.9512,
+ "step": 570
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019776475965598814,
+ "loss": 1.8349,
+ "step": 571
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977568137357694,
+ "loss": 2.0507,
+ "step": 572
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019774885387756138,
+ "loss": 1.7588,
+ "step": 573
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.000197740880082499,
+ "loss": 2.0981,
+ "step": 574
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019773289235171918,
+ "loss": 2.0953,
+ "step": 575
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019772489068636077,
+ "loss": 2.0678,
+ "step": 576
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019771687508756466,
+ "loss": 2.0136,
+ "step": 577
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977088455564736,
+ "loss": 1.9781,
+ "step": 578
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019770080209423254,
+ "loss": 2.2185,
+ "step": 579
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019769274470198827,
+ "loss": 1.8076,
+ "step": 580
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019768467338088957,
+ "loss": 1.6888,
+ "step": 581
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019767658813208726,
+ "loss": 2.1273,
+ "step": 582
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.0001976684889567341,
+ "loss": 2.3232,
+ "step": 583
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019766037585598487,
+ "loss": 2.366,
+ "step": 584
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019765224883099635,
+ "loss": 1.8939,
+ "step": 585
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019764410788292722,
+ "loss": 2.0162,
+ "step": 586
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019763595301293822,
+ "loss": 2.2752,
+ "step": 587
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976277842221921,
+ "loss": 1.9461,
+ "step": 588
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976196015118535,
+ "loss": 1.9999,
+ "step": 589
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976114048830891,
+ "loss": 2.0169,
+ "step": 590
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019760319433706757,
+ "loss": 2.1838,
+ "step": 591
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019759496987495955,
+ "loss": 2.3513,
+ "step": 592
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975867314979377,
+ "loss": 1.9915,
+ "step": 593
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975784792071766,
+ "loss": 2.1973,
+ "step": 594
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019757021300385286,
+ "loss": 2.3112,
+ "step": 595
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019756193288914507,
+ "loss": 2.0992,
+ "step": 596
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019755363886423376,
+ "loss": 2.4266,
+ "step": 597
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019754533093030148,
+ "loss": 1.7649,
+ "step": 598
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975370090885328,
+ "loss": 1.7573,
+ "step": 599
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019752867334011423,
+ "loss": 1.7949,
+ "step": 600
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975203236862342,
+ "loss": 2.0229,
+ "step": 601
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019751196012808325,
+ "loss": 2.0519,
+ "step": 602
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019750358266685383,
+ "loss": 2.0829,
+ "step": 603
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019749519130374038,
+ "loss": 2.0153,
+ "step": 604
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019748678603993933,
+ "loss": 1.8594,
+ "step": 605
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019747836687664908,
+ "loss": 2.1385,
+ "step": 606
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746993381507003,
+ "loss": 2.1317,
+ "step": 607
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746148685640451,
+ "loss": 1.1676,
+ "step": 608
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974530260018569,
+ "loss": 2.2856,
+ "step": 609
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974445512526336,
+ "loss": 2.1973,
+ "step": 610
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019743606260994278,
+ "loss": 1.6912,
+ "step": 611
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019742756007499486,
+ "loss": 1.8091,
+ "step": 612
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741904364900208,
+ "loss": 2.0108,
+ "step": 613
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741051333317867,
+ "loss": 2.1061,
+ "step": 614
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019740196912874087,
+ "loss": 1.8934,
+ "step": 615
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019739341103690693,
+ "loss": 1.8599,
+ "step": 616
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019738483905889703,
+ "loss": 2.0025,
+ "step": 617
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019737625319593335,
+ "loss": 1.8247,
+ "step": 618
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019736765344924005,
+ "loss": 2.222,
+ "step": 619
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019735903982004324,
+ "loss": 2.116,
+ "step": 620
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973504123095711,
+ "loss": 1.9183,
+ "step": 621
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973417709190536,
+ "loss": 2.1507,
+ "step": 622
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019733311564972296,
+ "loss": 1.7899,
+ "step": 623
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019732444650281315,
+ "loss": 2.1005,
+ "step": 624
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973157634795602,
+ "loss": 2.2391,
+ "step": 625
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019730706658120214,
+ "loss": 1.9466,
+ "step": 626
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.000197298355808979,
+ "loss": 1.9854,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "eval_loss": 1.9957869052886963,
+ "eval_runtime": 282.5544,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019728963116413266,
+ "loss": 2.1877,
+ "step": 628
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019728089264790712,
+ "loss": 2.2194,
+ "step": 629
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019727214026154827,
+ "loss": 1.9631,
+ "step": 630
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019726337400630405,
+ "loss": 2.3506,
+ "step": 631
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019725459388342432,
+ "loss": 2.0543,
+ "step": 632
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972457998941609,
+ "loss": 2.0402,
+ "step": 633
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019723699203976766,
+ "loss": 1.9316,
+ "step": 634
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972281703215004,
+ "loss": 2.2024,
+ "step": 635
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721933474061692,
+ "loss": 1.6776,
+ "step": 636
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721048529837694,
+ "loss": 1.9757,
+ "step": 637
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019720162199604222,
+ "loss": 1.7631,
+ "step": 638
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019719274483487648,
+ "loss": 2.34,
+ "step": 639
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.0001971838538161454,
+ "loss": 1.8469,
+ "step": 640
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019717494894111662,
+ "loss": 2.3151,
+ "step": 641
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019716603021105987,
+ "loss": 2.0661,
+ "step": 642
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019715709762724667,
+ "loss": 2.0408,
+ "step": 643
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019714815119095062,
+ "loss": 1.9848,
+ "step": 644
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019713919090344736,
+ "loss": 2.3134,
+ "step": 645
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019713021676601438,
+ "loss": 2.4947,
+ "step": 646
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001971212287799312,
+ "loss": 2.0515,
+ "step": 647
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019711222694647932,
+ "loss": 2.6216,
+ "step": 648
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019710321126694216,
+ "loss": 1.6517,
+ "step": 649
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001970941817426052,
+ "loss": 2.0408,
+ "step": 650
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019708513837475588,
+ "loss": 1.8841,
+ "step": 651
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019707608116468356,
+ "loss": 2.1966,
+ "step": 652
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019706701011367955,
+ "loss": 1.7587,
+ "step": 653
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970579252230373,
+ "loss": 2.2196,
+ "step": 654
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019704882649405198,
+ "loss": 1.8146,
+ "step": 655
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703971392802098,
+ "loss": 2.2932,
+ "step": 656
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703058752624353,
+ "loss": 1.923,
+ "step": 657
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970214472900208,
+ "loss": 2.2393,
+ "step": 658
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019701229322065605,
+ "loss": 1.7338,
+ "step": 659
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019700312531945442,
+ "loss": 1.7859,
+ "step": 660
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019699394358772306,
+ "loss": 2.2719,
+ "step": 661
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019698474802677107,
+ "loss": 1.576,
+ "step": 662
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019697553863790956,
+ "loss": 2.3333,
+ "step": 663
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019696631542245156,
+ "loss": 2.3508,
+ "step": 664
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019695707838171216,
+ "loss": 2.1876,
+ "step": 665
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019694782751700828,
+ "loss": 1.4863,
+ "step": 666
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019693856282965898,
+ "loss": 1.8948,
+ "step": 667
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019692928432098512,
+ "loss": 1.6867,
+ "step": 668
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691999199230963,
+ "loss": 1.7682,
+ "step": 669
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691068584495742,
+ "loss": 2.0914,
+ "step": 670
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019690136588025535,
+ "loss": 2.1413,
+ "step": 671
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019689203209953223,
+ "loss": 2.1275,
+ "step": 672
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.0001968826845041188,
+ "loss": 1.9556,
+ "step": 673
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019687332309534792,
+ "loss": 2.2209,
+ "step": 674
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019686394787455424,
+ "loss": 1.9853,
+ "step": 675
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019685455884307454,
+ "loss": 2.0877,
+ "step": 676
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019684515600224743,
+ "loss": 2.1607,
+ "step": 677
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019683573935341358,
+ "loss": 2.2664,
+ "step": 678
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019682630889791556,
+ "loss": 1.8527,
+ "step": 679
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.000196816864637098,
+ "loss": 1.8417,
+ "step": 680
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019680740657230738,
+ "loss": 1.9853,
+ "step": 681
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019679793470489228,
+ "loss": 1.8419,
+ "step": 682
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019678844903620317,
+ "loss": 1.9971,
+ "step": 683
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019677894956759246,
+ "loss": 1.9843,
+ "step": 684
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019676943630041462,
+ "loss": 2.376,
+ "step": 685
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675990923602598,
+ "loss": 2.1558,
+ "step": 686
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675036837578494,
+ "loss": 1.5752,
+ "step": 687
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001967408137210518,
+ "loss": 1.6704,
+ "step": 688
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019673124527318881,
+ "loss": 2.1389,
+ "step": 689
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019672166303356028,
+ "loss": 2.126,
+ "step": 690
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019671206700353237,
+ "loss": 1.9402,
+ "step": 691
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019670245718447335,
+ "loss": 1.6701,
+ "step": 692
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019669283357775328,
+ "loss": 1.8134,
+ "step": 693
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001966831961847443,
+ "loss": 2.1642,
+ "step": 694
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019667354500682054,
+ "loss": 1.8455,
+ "step": 695
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.000196663880045358,
+ "loss": 1.9646,
+ "step": 696
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966542013017347,
+ "loss": 1.9855,
+ "step": 697
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019664450877733062,
+ "loss": 1.7029,
+ "step": 698
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019663480247352773,
+ "loss": 1.9789,
+ "step": 699
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966250823917099,
+ "loss": 1.8751,
+ "step": 700
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019661534853326301,
+ "loss": 2.3644,
+ "step": 701
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019660560089957492,
+ "loss": 1.8006,
+ "step": 702
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001965958394920354,
+ "loss": 2.2799,
+ "step": 703
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019658606431203622,
+ "loss": 1.9258,
+ "step": 704
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965762753609711,
+ "loss": 1.9521,
+ "step": 705
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019656647264023575,
+ "loss": 1.9675,
+ "step": 706
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019655665615122783,
+ "loss": 2.3686,
+ "step": 707
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019654682589534693,
+ "loss": 2.1448,
+ "step": 708
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019653698187399466,
+ "loss": 2.2475,
+ "step": 709
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965271240885745,
+ "loss": 1.9417,
+ "step": 710
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965172525404921,
+ "loss": 2.154,
+ "step": 711
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019650736723115475,
+ "loss": 2.0646,
+ "step": 712
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019649746816197196,
+ "loss": 2.235,
+ "step": 713
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019648755533435518,
+ "loss": 1.7122,
+ "step": 714
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019647762874971765,
+ "loss": 2.0635,
+ "step": 715
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019646768840947474,
+ "loss": 1.8904,
+ "step": 716
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019645773431504373,
+ "loss": 1.608,
+ "step": 717
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019644776646784388,
+ "loss": 2.2307,
+ "step": 718
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.0001964377848692963,
+ "loss": 2.176,
+ "step": 719
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019642778952082426,
+ "loss": 2.1984,
+ "step": 720
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001964177804238528,
+ "loss": 2.2625,
+ "step": 721
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019640775757980903,
+ "loss": 2.3142,
+ "step": 722
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019639772099012197,
+ "loss": 2.2366,
+ "step": 723
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019638767065622266,
+ "loss": 1.7823,
+ "step": 724
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.000196377606579544,
+ "loss": 2.0677,
+ "step": 725
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019636752876152095,
+ "loss": 1.3337,
+ "step": 726
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019635743720359037,
+ "loss": 2.055,
+ "step": 727
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001963473319071911,
+ "loss": 1.9888,
+ "step": 728
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019633721287376393,
+ "loss": 1.9258,
+ "step": 729
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019632708010475165,
+ "loss": 2.3768,
+ "step": 730
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001963169336015989,
+ "loss": 1.993,
+ "step": 731
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019630677336575242,
+ "loss": 2.1989,
+ "step": 732
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001962965993986608,
+ "loss": 2.1216,
+ "step": 733
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019628641170177464,
+ "loss": 2.2217,
+ "step": 734
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019627621027654648,
+ "loss": 1.8809,
+ "step": 735
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019626599512443077,
+ "loss": 2.0864,
+ "step": 736
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019625576624688406,
+ "loss": 2.0627,
+ "step": 737
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019624552364536473,
+ "loss": 2.1347,
+ "step": 738
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019623526732133315,
+ "loss": 1.9998,
+ "step": 739
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019622499727625162,
+ "loss": 2.1998,
+ "step": 740
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019621471351158443,
+ "loss": 1.974,
+ "step": 741
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019620441602879787,
+ "loss": 1.9425,
+ "step": 742
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019619410482936008,
+ "loss": 2.6227,
+ "step": 743
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019618377991474124,
+ "loss": 2.1209,
+ "step": 744
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019617344128641345,
+ "loss": 2.0606,
+ "step": 745
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019616308894585078,
+ "loss": 2.296,
+ "step": 746
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019615272289452923,
+ "loss": 2.0415,
+ "step": 747
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961423431339268,
+ "loss": 1.9516,
+ "step": 748
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961319496655234,
+ "loss": 2.0468,
+ "step": 749
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961215424908009,
+ "loss": 1.877,
+ "step": 750
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961111216112432,
+ "loss": 1.8129,
+ "step": 751
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019610068702833596,
+ "loss": 1.9984,
+ "step": 752
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019609023874356707,
+ "loss": 1.9013,
+ "step": 753
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019607977675842615,
+ "loss": 2.0546,
+ "step": 754
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019606930107440485,
+ "loss": 2.2817,
+ "step": 755
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960588116929968,
+ "loss": 2.0578,
+ "step": 756
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019604830861569755,
+ "loss": 2.3521,
+ "step": 757
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019603779184400457,
+ "loss": 2.0392,
+ "step": 758
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960272613794174,
+ "loss": 1.9863,
+ "step": 759
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019601671722343738,
+ "loss": 2.1889,
+ "step": 760
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960061593775679,
+ "loss": 2.0908,
+ "step": 761
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001959955878433143,
+ "loss": 1.986,
+ "step": 762
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019598500262218386,
+ "loss": 2.0339,
+ "step": 763
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019597440371568574,
+ "loss": 2.0958,
+ "step": 764
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.0001959637911253312,
+ "loss": 1.9866,
+ "step": 765
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019595316485263327,
+ "loss": 2.2228,
+ "step": 766
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019594252489910706,
+ "loss": 1.915,
+ "step": 767
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019593187126626965,
+ "loss": 2.0741,
+ "step": 768
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019592120395563994,
+ "loss": 2.5346,
+ "step": 769
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019591052296873888,
+ "loss": 2.4908,
+ "step": 770
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019589982830708937,
+ "loss": 2.1042,
+ "step": 771
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019588911997221625,
+ "loss": 1.8676,
+ "step": 772
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958783979656462,
+ "loss": 1.9152,
+ "step": 773
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019586766228890806,
+ "loss": 1.7784,
+ "step": 774
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958569129435324,
+ "loss": 2.0784,
+ "step": 775
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958461499310519,
+ "loss": 1.7262,
+ "step": 776
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019583537325300118,
+ "loss": 2.4154,
+ "step": 777
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019582458291091663,
+ "loss": 2.3185,
+ "step": 778
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019581377890633684,
+ "loss": 2.0981,
+ "step": 779
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019580296124080212,
+ "loss": 1.8952,
+ "step": 780
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019579212991585493,
+ "loss": 1.7208,
+ "step": 781
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019578128493303955,
+ "loss": 2.0209,
+ "step": 782
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019577042629390217,
+ "loss": 2.1867,
+ "step": 783
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957595539999911,
+ "loss": 2.0805,
+ "step": 784
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019574866805285645,
+ "loss": 2.0451,
+ "step": 785
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019573776845405028,
+ "loss": 2.2056,
+ "step": 786
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957268552051267,
+ "loss": 2.0773,
+ "step": 787
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019571592830764165,
+ "loss": 2.2036,
+ "step": 788
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019570498776315309,
+ "loss": 1.7298,
+ "step": 789
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956940335732209,
+ "loss": 1.8931,
+ "step": 790
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956830657394069,
+ "loss": 2.1567,
+ "step": 791
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019567208426327488,
+ "loss": 1.9471,
+ "step": 792
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019566108914639054,
+ "loss": 1.8916,
+ "step": 793
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019565008039032158,
+ "loss": 2.0111,
+ "step": 794
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019563905799663752,
+ "loss": 2.1374,
+ "step": 795
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019562802196691003,
+ "loss": 2.3083,
+ "step": 796
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019561697230271254,
+ "loss": 2.0381,
+ "step": 797
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001956059090056205,
+ "loss": 2.1909,
+ "step": 798
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019559483207721133,
+ "loss": 1.9893,
+ "step": 799
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955837415190643,
+ "loss": 2.3178,
+ "step": 800
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955726373327607,
+ "loss": 2.0815,
+ "step": 801
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019556151951988376,
+ "loss": 1.6012,
+ "step": 802
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019555038808201865,
+ "loss": 1.4965,
+ "step": 803
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019553924302075242,
+ "loss": 2.3069,
+ "step": 804
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019552808433767415,
+ "loss": 2.2388,
+ "step": 805
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019551691203437482,
+ "loss": 2.5662,
+ "step": 806
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019550572611244738,
+ "loss": 1.9419,
+ "step": 807
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019549452657348663,
+ "loss": 2.3638,
+ "step": 808
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019548331341908947,
+ "loss": 2.1567,
+ "step": 809
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019547208665085457,
+ "loss": 1.9697,
+ "step": 810
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019546084627038268,
+ "loss": 1.9006,
+ "step": 811
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.0001954495922792764,
+ "loss": 2.304,
+ "step": 812
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.0001954383246791403,
+ "loss": 2.0494,
+ "step": 813
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019542704347158093,
+ "loss": 1.8562,
+ "step": 814
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019541574865820672,
+ "loss": 2.1041,
+ "step": 815
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019540444024062804,
+ "loss": 2.22,
+ "step": 816
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019539311822045727,
+ "loss": 1.9925,
+ "step": 817
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019538178259930869,
+ "loss": 2.3213,
+ "step": 818
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019537043337879845,
+ "loss": 2.0319,
+ "step": 819
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019535907056054475,
+ "loss": 1.8578,
+ "step": 820
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019534769414616764,
+ "loss": 1.4115,
+ "step": 821
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953363041372892,
+ "loss": 2.0731,
+ "step": 822
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019532490053553335,
+ "loss": 2.0605,
+ "step": 823
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019531348334252607,
+ "loss": 1.9044,
+ "step": 824
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953020525598951,
+ "loss": 1.7405,
+ "step": 825
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001952906081892703,
+ "loss": 1.898,
+ "step": 826
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019527915023228332,
+ "loss": 1.9696,
+ "step": 827
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019526767869056788,
+ "loss": 2.0469,
+ "step": 828
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019525619356575952,
+ "loss": 2.0307,
+ "step": 829
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019524469485949583,
+ "loss": 2.002,
+ "step": 830
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019523318257341622,
+ "loss": 1.9438,
+ "step": 831
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019522165670916207,
+ "loss": 1.535,
+ "step": 832
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001952101172683768,
+ "loss": 1.7505,
+ "step": 833
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019519856425270562,
+ "loss": 2.2248,
+ "step": 834
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019518699766379576,
+ "loss": 2.0669,
+ "step": 835
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019517541750329635,
+ "loss": 2.0268,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "eval_loss": 1.9969017505645752,
+ "eval_runtime": 283.3157,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019516382377285848,
+ "loss": 1.6712,
+ "step": 837
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001951522164741352,
+ "loss": 2.1558,
+ "step": 838
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019514059560878138,
+ "loss": 2.1599,
+ "step": 839
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019512896117845392,
+ "loss": 1.8762,
+ "step": 840
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019511731318481168,
+ "loss": 2.0189,
+ "step": 841
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019510565162951537,
+ "loss": 1.9364,
+ "step": 842
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019509397651422769,
+ "loss": 1.7319,
+ "step": 843
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019508228784061326,
+ "loss": 1.9424,
+ "step": 844
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001950705856103386,
+ "loss": 2.277,
+ "step": 845
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019505886982507225,
+ "loss": 1.6511,
+ "step": 846
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950471404864846,
+ "loss": 1.9056,
+ "step": 847
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019503539759624798,
+ "loss": 1.5105,
+ "step": 848
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950236411560367,
+ "loss": 1.9469,
+ "step": 849
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019501187116752693,
+ "loss": 1.5012,
+ "step": 850
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019500008763239683,
+ "loss": 1.7086,
+ "step": 851
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019498829055232647,
+ "loss": 1.5586,
+ "step": 852
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019497647992899788,
+ "loss": 1.5573,
+ "step": 853
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.000194964655764095,
+ "loss": 2.0757,
+ "step": 854
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019495281805930367,
+ "loss": 1.5478,
+ "step": 855
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019494096681631172,
+ "loss": 1.7068,
+ "step": 856
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019492910203680884,
+ "loss": 1.6759,
+ "step": 857
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001949172237224867,
+ "loss": 1.4621,
+ "step": 858
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019490533187503892,
+ "loss": 1.5359,
+ "step": 859
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.000194893426496161,
+ "loss": 1.9365,
+ "step": 860
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019488150758755035,
+ "loss": 1.7089,
+ "step": 861
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019486957515090641,
+ "loss": 1.4924,
+ "step": 862
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019485762918793046,
+ "loss": 1.387,
+ "step": 863
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.0001948456697003257,
+ "loss": 1.631,
+ "step": 864
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019483369668979732,
+ "loss": 1.7953,
+ "step": 865
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019482171015805245,
+ "loss": 1.7552,
+ "step": 866
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019480971010680002,
+ "loss": 1.8313,
+ "step": 867
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019479769653775106,
+ "loss": 1.593,
+ "step": 868
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019478566945261837,
+ "loss": 1.9506,
+ "step": 869
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019477362885311682,
+ "loss": 1.9598,
+ "step": 870
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947615747409631,
+ "loss": 1.7324,
+ "step": 871
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019474950711787585,
+ "loss": 2.1208,
+ "step": 872
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947374259855757,
+ "loss": 1.4111,
+ "step": 873
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019472533134578507,
+ "loss": 1.6696,
+ "step": 874
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019471322320022849,
+ "loss": 1.6999,
+ "step": 875
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019470110155063225,
+ "loss": 2.1287,
+ "step": 876
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019468896639872468,
+ "loss": 1.874,
+ "step": 877
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019467681774623592,
+ "loss": 1.7149,
+ "step": 878
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019466465559489816,
+ "loss": 1.9563,
+ "step": 879
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019465247994644545,
+ "loss": 1.3504,
+ "step": 880
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019464029080261378,
+ "loss": 1.6176,
+ "step": 881
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019462808816514103,
+ "loss": 1.7577,
+ "step": 882
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019461587203576706,
+ "loss": 1.8054,
+ "step": 883
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019460364241623358,
+ "loss": 2.0246,
+ "step": 884
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019459139930828428,
+ "loss": 1.7645,
+ "step": 885
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945791427136648,
+ "loss": 1.9225,
+ "step": 886
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019456687263412262,
+ "loss": 1.8967,
+ "step": 887
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945545890714072,
+ "loss": 1.5287,
+ "step": 888
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945422920272699,
+ "loss": 1.5033,
+ "step": 889
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019452998150346401,
+ "loss": 2.0148,
+ "step": 890
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945176575017448,
+ "loss": 1.3706,
+ "step": 891
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001945053200238693,
+ "loss": 1.7603,
+ "step": 892
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019449296907159667,
+ "loss": 1.9884,
+ "step": 893
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019448060464668783,
+ "loss": 1.6133,
+ "step": 894
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019446822675090565,
+ "loss": 1.7885,
+ "step": 895
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019445583538601498,
+ "loss": 1.8573,
+ "step": 896
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944434305537826,
+ "loss": 1.7241,
+ "step": 897
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944310122559771,
+ "loss": 1.8942,
+ "step": 898
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944185804943691,
+ "loss": 1.7541,
+ "step": 899
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019440613527073105,
+ "loss": 1.9608,
+ "step": 900
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019439367658683745,
+ "loss": 2.0969,
+ "step": 901
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019438120444446457,
+ "loss": 2.2589,
+ "step": 902
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943687188453907,
+ "loss": 1.7335,
+ "step": 903
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019435621979139596,
+ "loss": 1.8663,
+ "step": 904
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019434370728426252,
+ "loss": 1.5627,
+ "step": 905
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943311813257743,
+ "loss": 1.6101,
+ "step": 906
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019431864191771732,
+ "loss": 1.9661,
+ "step": 907
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943060890618794,
+ "loss": 1.6487,
+ "step": 908
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019429352276005026,
+ "loss": 2.1282,
+ "step": 909
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019428094301402162,
+ "loss": 1.6944,
+ "step": 910
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019426834982558705,
+ "loss": 1.2433,
+ "step": 911
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019425574319654213,
+ "loss": 1.5735,
+ "step": 912
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019424312312868417,
+ "loss": 1.6499,
+ "step": 913
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019423048962381265,
+ "loss": 1.8366,
+ "step": 914
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019421784268372876,
+ "loss": 1.906,
+ "step": 915
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019420518231023568,
+ "loss": 1.5976,
+ "step": 916
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941925085051385,
+ "loss": 1.6722,
+ "step": 917
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019417982127024422,
+ "loss": 1.8832,
+ "step": 918
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019416712060736183,
+ "loss": 1.8865,
+ "step": 919
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019415440651830208,
+ "loss": 1.6627,
+ "step": 920
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941416790048778,
+ "loss": 1.3598,
+ "step": 921
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019412893806890357,
+ "loss": 2.0506,
+ "step": 922
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019411618371219605,
+ "loss": 1.9794,
+ "step": 923
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941034159365737,
+ "loss": 1.7851,
+ "step": 924
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001940906347438569,
+ "loss": 1.8312,
+ "step": 925
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019407784013586804,
+ "loss": 1.5167,
+ "step": 926
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019406503211443128,
+ "loss": 1.5725,
+ "step": 927
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019405221068137277,
+ "loss": 1.8857,
+ "step": 928
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019403937583852061,
+ "loss": 1.741,
+ "step": 929
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019402652758770475,
+ "loss": 1.6748,
+ "step": 930
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019401366593075706,
+ "loss": 1.7285,
+ "step": 931
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019400079086951135,
+ "loss": 1.7545,
+ "step": 932
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019398790240580333,
+ "loss": 1.4491,
+ "step": 933
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019397500054147058,
+ "loss": 1.3359,
+ "step": 934
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019396208527835263,
+ "loss": 1.9567,
+ "step": 935
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.0001939491566182909,
+ "loss": 2.0011,
+ "step": 936
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019393621456312881,
+ "loss": 1.9076,
+ "step": 937
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019392325911471155,
+ "loss": 1.5388,
+ "step": 938
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019391029027488629,
+ "loss": 1.2337,
+ "step": 939
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019389730804550211,
+ "loss": 1.5752,
+ "step": 940
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019388431242840998,
+ "loss": 1.9131,
+ "step": 941
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019387130342546284,
+ "loss": 1.4177,
+ "step": 942
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019385828103851544,
+ "loss": 1.5865,
+ "step": 943
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001938452452694245,
+ "loss": 1.6335,
+ "step": 944
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019383219612004865,
+ "loss": 1.8599,
+ "step": 945
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019381913359224842,
+ "loss": 1.3035,
+ "step": 946
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019380605768788621,
+ "loss": 1.7586,
+ "step": 947
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001937929684088264,
+ "loss": 1.7334,
+ "step": 948
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019377986575693518,
+ "loss": 1.5749,
+ "step": 949
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019376674973408075,
+ "loss": 1.874,
+ "step": 950
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019375362034213314,
+ "loss": 2.3055,
+ "step": 951
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019374047758296433,
+ "loss": 1.5801,
+ "step": 952
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001937273214584482,
+ "loss": 1.8788,
+ "step": 953
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019371415197046052,
+ "loss": 2.431,
+ "step": 954
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019370096912087897,
+ "loss": 1.4963,
+ "step": 955
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001936877729115831,
+ "loss": 1.514,
+ "step": 956
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019367456334445446,
+ "loss": 1.6099,
+ "step": 957
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019366134042137642,
+ "loss": 1.9367,
+ "step": 958
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019364810414423427,
+ "loss": 1.7384,
+ "step": 959
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019363485451491524,
+ "loss": 1.6166,
+ "step": 960
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019362159153530844,
+ "loss": 1.955,
+ "step": 961
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019360831520730482,
+ "loss": 1.4189,
+ "step": 962
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019359502553279736,
+ "loss": 1.4506,
+ "step": 963
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019358172251368087,
+ "loss": 1.7108,
+ "step": 964
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019356840615185203,
+ "loss": 1.6641,
+ "step": 965
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019355507644920952,
+ "loss": 1.7506,
+ "step": 966
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019354173340765382,
+ "loss": 2.0598,
+ "step": 967
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935283770290874,
+ "loss": 1.3494,
+ "step": 968
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019351500731541453,
+ "loss": 1.6571,
+ "step": 969
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935016242685415,
+ "loss": 1.6403,
+ "step": 970
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019348822789037637,
+ "loss": 1.7555,
+ "step": 971
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019347481818282925,
+ "loss": 2.1451,
+ "step": 972
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.000193461395147812,
+ "loss": 1.4522,
+ "step": 973
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934479587872385,
+ "loss": 1.7147,
+ "step": 974
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934345091030245,
+ "loss": 1.3909,
+ "step": 975
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019342104609708756,
+ "loss": 1.8104,
+ "step": 976
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019340756977134728,
+ "loss": 1.5221,
+ "step": 977
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.000193394080127725,
+ "loss": 1.9447,
+ "step": 978
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.0001933805771681442,
+ "loss": 1.5742,
+ "step": 979
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019336706089452996,
+ "loss": 1.5312,
+ "step": 980
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019335353130880948,
+ "loss": 1.4304,
+ "step": 981
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019333998841291177,
+ "loss": 1.8379,
+ "step": 982
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019332643220876773,
+ "loss": 1.877,
+ "step": 983
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001933128626983102,
+ "loss": 1.9627,
+ "step": 984
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932992798834739,
+ "loss": 1.7857,
+ "step": 985
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019328568376619543,
+ "loss": 1.3189,
+ "step": 986
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019327207434841333,
+ "loss": 1.9588,
+ "step": 987
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019325845163206795,
+ "loss": 1.3132,
+ "step": 988
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019324481561910163,
+ "loss": 1.6304,
+ "step": 989
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932311663114586,
+ "loss": 1.8322,
+ "step": 990
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019321750371108486,
+ "loss": 1.4192,
+ "step": 991
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001932038278199285,
+ "loss": 1.3915,
+ "step": 992
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019319013863993933,
+ "loss": 1.8433,
+ "step": 993
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931764361730692,
+ "loss": 2.1459,
+ "step": 994
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931627204212717,
+ "loss": 1.9799,
+ "step": 995
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019314899138650243,
+ "loss": 1.855,
+ "step": 996
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019313524907071887,
+ "loss": 1.4763,
+ "step": 997
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019312149347588037,
+ "loss": 2.0128,
+ "step": 998
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019310772460394814,
+ "loss": 1.6964,
+ "step": 999
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001930939424568854,
+ "loss": 1.5864,
+ "step": 1000
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019308014703665712,
+ "loss": 1.8437,
+ "step": 1001
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019306633834523024,
+ "loss": 2.1677,
+ "step": 1002
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019305251638457356,
+ "loss": 1.8872,
+ "step": 1003
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930386811566578,
+ "loss": 1.7312,
+ "step": 1004
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930248326634556,
+ "loss": 1.6772,
+ "step": 1005
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019301097090694143,
+ "loss": 1.9666,
+ "step": 1006
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019299709588909165,
+ "loss": 1.8946,
+ "step": 1007
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019298320761188453,
+ "loss": 2.1784,
+ "step": 1008
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.0001929693060773003,
+ "loss": 2.0249,
+ "step": 1009
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019295539128732093,
+ "loss": 1.717,
+ "step": 1010
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019294146324393046,
+ "loss": 1.8671,
+ "step": 1011
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019292752194911464,
+ "loss": 1.8388,
+ "step": 1012
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019291356740486123,
+ "loss": 1.9111,
+ "step": 1013
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019289959961315986,
+ "loss": 1.5287,
+ "step": 1014
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.000192885618576002,
+ "loss": 1.5669,
+ "step": 1015
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019287162429538105,
+ "loss": 1.9095,
+ "step": 1016
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019285761677329232,
+ "loss": 1.9133,
+ "step": 1017
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019284359601173294,
+ "loss": 2.1099,
+ "step": 1018
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.000192829562012702,
+ "loss": 1.6303,
+ "step": 1019
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019281551477820036,
+ "loss": 1.5907,
+ "step": 1020
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019280145431023097,
+ "loss": 1.4897,
+ "step": 1021
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019278738061079845,
+ "loss": 1.7414,
+ "step": 1022
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019277329368190942,
+ "loss": 1.816,
+ "step": 1023
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019275919352557241,
+ "loss": 1.5033,
+ "step": 1024
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019274508014379777,
+ "loss": 1.7923,
+ "step": 1025
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019273095353859775,
+ "loss": 1.3094,
+ "step": 1026
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019271681371198652,
+ "loss": 1.7689,
+ "step": 1027
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.0001927026606659801,
+ "loss": 1.8019,
+ "step": 1028
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019268849440259639,
+ "loss": 1.8818,
+ "step": 1029
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019267431492385521,
+ "loss": 1.7442,
+ "step": 1030
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019266012223177824,
+ "loss": 2.045,
+ "step": 1031
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019264591632838903,
+ "loss": 1.7842,
+ "step": 1032
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019263169721571308,
+ "loss": 1.5289,
+ "step": 1033
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019261746489577765,
+ "loss": 1.6013,
+ "step": 1034
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019260321937061202,
+ "loss": 1.7912,
+ "step": 1035
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925889606422473,
+ "loss": 1.7573,
+ "step": 1036
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925746887127164,
+ "loss": 1.7368,
+ "step": 1037
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019256040358405424,
+ "loss": 1.7497,
+ "step": 1038
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019254610525829758,
+ "loss": 2.0042,
+ "step": 1039
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019253179373748504,
+ "loss": 2.0732,
+ "step": 1040
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019251746902365708,
+ "loss": 1.8878,
+ "step": 1041
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019250313111885618,
+ "loss": 1.9404,
+ "step": 1042
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019248878002512654,
+ "loss": 1.5535,
+ "step": 1043
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019247441574451432,
+ "loss": 1.9344,
+ "step": 1044
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001924600382790676,
+ "loss": 1.9696,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "eval_loss": 2.064669609069824,
+ "eval_runtime": 283.003,
+ "eval_samples_per_second": 0.728,
+ "eval_steps_per_second": 0.728,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019244564763083624,
+ "loss": 1.4577,
+ "step": 1046
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019243124380187204,
+ "loss": 2.1324,
+ "step": 1047
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019241682679422873,
+ "loss": 1.4713,
+ "step": 1048
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019240239660996177,
+ "loss": 1.7455,
+ "step": 1049
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001923879532511287,
+ "loss": 1.5372,
+ "step": 1050
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019237349671978872,
+ "loss": 2.0984,
+ "step": 1051
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923590270180031,
+ "loss": 1.5023,
+ "step": 1052
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923445441478348,
+ "loss": 2.0826,
+ "step": 1053
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019233004811134886,
+ "loss": 1.7448,
+ "step": 1054
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019231553891061208,
+ "loss": 2.0249,
+ "step": 1055
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019230101654769312,
+ "loss": 1.6144,
+ "step": 1056
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001922864810246626,
+ "loss": 1.9193,
+ "step": 1057
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019227193234359292,
+ "loss": 2.0057,
+ "step": 1058
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019225737050655842,
+ "loss": 1.9493,
+ "step": 1059
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019224279551563532,
+ "loss": 1.9545,
+ "step": 1060
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001922282073729017,
+ "loss": 1.8983,
+ "step": 1061
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019221360608043746,
+ "loss": 1.9414,
+ "step": 1062
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019219899164032447,
+ "loss": 1.8471,
+ "step": 1063
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921843640546464,
+ "loss": 1.7568,
+ "step": 1064
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019216972332548887,
+ "loss": 2.0737,
+ "step": 1065
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921550694549393,
+ "loss": 1.6109,
+ "step": 1066
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.000192140402445087,
+ "loss": 1.6684,
+ "step": 1067
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001921257222980232,
+ "loss": 1.5101,
+ "step": 1068
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019211102901584094,
+ "loss": 1.5262,
+ "step": 1069
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920963226006352,
+ "loss": 1.9757,
+ "step": 1070
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019208160305450272,
+ "loss": 2.038,
+ "step": 1071
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019206687037954224,
+ "loss": 1.4755,
+ "step": 1072
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019205212457785434,
+ "loss": 1.7406,
+ "step": 1073
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019203736565154137,
+ "loss": 1.9564,
+ "step": 1074
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920225936027077,
+ "loss": 1.823,
+ "step": 1075
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001920078084334595,
+ "loss": 1.8275,
+ "step": 1076
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919930101459048,
+ "loss": 1.7106,
+ "step": 1077
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019197819874215347,
+ "loss": 1.5958,
+ "step": 1078
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019196337422431735,
+ "loss": 2.1478,
+ "step": 1079
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919485365945101,
+ "loss": 1.7238,
+ "step": 1080
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019193368585484718,
+ "loss": 2.0758,
+ "step": 1081
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.000191918822007446,
+ "loss": 1.8403,
+ "step": 1082
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019190394505442585,
+ "loss": 1.8286,
+ "step": 1083
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019188905499790789,
+ "loss": 1.6992,
+ "step": 1084
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019187415184001503,
+ "loss": 1.8512,
+ "step": 1085
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918592355828722,
+ "loss": 1.8236,
+ "step": 1086
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918443062286061,
+ "loss": 1.6173,
+ "step": 1087
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019182936377934535,
+ "loss": 1.8593,
+ "step": 1088
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918144082372204,
+ "loss": 1.8184,
+ "step": 1089
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019179943960436358,
+ "loss": 1.9655,
+ "step": 1090
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019178445788290915,
+ "loss": 1.5858,
+ "step": 1091
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019176946307499312,
+ "loss": 1.8359,
+ "step": 1092
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917544551827534,
+ "loss": 1.4354,
+ "step": 1093
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019173943420832984,
+ "loss": 1.4312,
+ "step": 1094
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917244001538641,
+ "loss": 2.0024,
+ "step": 1095
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019170935302149965,
+ "loss": 1.5994,
+ "step": 1096
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019169429281338195,
+ "loss": 2.05,
+ "step": 1097
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019167921953165825,
+ "loss": 1.8746,
+ "step": 1098
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019166413317847763,
+ "loss": 2.0071,
+ "step": 1099
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019164903375599112,
+ "loss": 2.0331,
+ "step": 1100
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019163392126635154,
+ "loss": 1.3587,
+ "step": 1101
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019161879571171362,
+ "loss": 1.6144,
+ "step": 1102
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019160365709423388,
+ "loss": 1.4845,
+ "step": 1103
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019158850541607083,
+ "loss": 1.4511,
+ "step": 1104
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019157334067938474,
+ "loss": 1.8015,
+ "step": 1105
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019155816288633776,
+ "loss": 1.5029,
+ "step": 1106
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019154297203909394,
+ "loss": 1.7102,
+ "step": 1107
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019152776813981912,
+ "loss": 1.6661,
+ "step": 1108
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001915125511906811,
+ "loss": 1.5872,
+ "step": 1109
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019149732119384943,
+ "loss": 1.7868,
+ "step": 1110
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914820781514956,
+ "loss": 1.6365,
+ "step": 1111
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914668220657929,
+ "loss": 2.3434,
+ "step": 1112
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914515529389166,
+ "loss": 1.6458,
+ "step": 1113
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914362707730437,
+ "loss": 1.7061,
+ "step": 1114
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019142097557035308,
+ "loss": 1.8606,
+ "step": 1115
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019140566733302552,
+ "loss": 1.9415,
+ "step": 1116
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019139034606324362,
+ "loss": 1.7411,
+ "step": 1117
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019137501176319193,
+ "loss": 1.9404,
+ "step": 1118
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913596644350567,
+ "loss": 1.802,
+ "step": 1119
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019134430408102615,
+ "loss": 1.2244,
+ "step": 1120
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019132893070329036,
+ "loss": 1.902,
+ "step": 1121
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913135443040412,
+ "loss": 1.4578,
+ "step": 1122
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019129814488547247,
+ "loss": 1.6816,
+ "step": 1123
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001912827324497798,
+ "loss": 1.7293,
+ "step": 1124
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019126730699916061,
+ "loss": 1.6344,
+ "step": 1125
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.0001912518685358143,
+ "loss": 1.6819,
+ "step": 1126
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019123641706194199,
+ "loss": 1.6761,
+ "step": 1127
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019122095257974677,
+ "loss": 1.9222,
+ "step": 1128
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019120547509143354,
+ "loss": 1.6117,
+ "step": 1129
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019118998459920902,
+ "loss": 1.688,
+ "step": 1130
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019117448110528184,
+ "loss": 1.8383,
+ "step": 1131
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019115896461186245,
+ "loss": 1.5225,
+ "step": 1132
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019114343512116318,
+ "loss": 2.0376,
+ "step": 1133
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019112789263539813,
+ "loss": 1.5632,
+ "step": 1134
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019111233715678343,
+ "loss": 1.7049,
+ "step": 1135
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910967686875369,
+ "loss": 1.4992,
+ "step": 1136
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019108118722987826,
+ "loss": 1.7949,
+ "step": 1137
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019106559278602903,
+ "loss": 1.4688,
+ "step": 1138
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019104998535821274,
+ "loss": 1.4031,
+ "step": 1139
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910343649486546,
+ "loss": 2.1757,
+ "step": 1140
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019101873155958179,
+ "loss": 1.622,
+ "step": 1141
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019100308519322322,
+ "loss": 1.9441,
+ "step": 1142
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.0001909874258518098,
+ "loss": 1.8065,
+ "step": 1143
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019097175353757417,
+ "loss": 1.8348,
+ "step": 1144
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019095606825275083,
+ "loss": 2.0519,
+ "step": 1145
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019094036999957624,
+ "loss": 1.9172,
+ "step": 1146
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019092465878028854,
+ "loss": 1.9961,
+ "step": 1147
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019090893459712787,
+ "loss": 2.1239,
+ "step": 1148
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019089319745233611,
+ "loss": 1.3481,
+ "step": 1149
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019087744734815708,
+ "loss": 1.5035,
+ "step": 1150
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019086168428683638,
+ "loss": 1.818,
+ "step": 1151
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019084590827062145,
+ "loss": 2.0481,
+ "step": 1152
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019083011930176165,
+ "loss": 1.4444,
+ "step": 1153
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019081431738250814,
+ "loss": 1.6059,
+ "step": 1154
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907985025151139,
+ "loss": 2.0284,
+ "step": 1155
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907826747018338,
+ "loss": 1.8603,
+ "step": 1156
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019076683394492455,
+ "loss": 1.7189,
+ "step": 1157
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019075098024664468,
+ "loss": 1.7497,
+ "step": 1158
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019073511360925458,
+ "loss": 1.7489,
+ "step": 1159
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001907192340350165,
+ "loss": 1.6059,
+ "step": 1160
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019070334152619453,
+ "loss": 1.4407,
+ "step": 1161
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019068743608505455,
+ "loss": 1.7025,
+ "step": 1162
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019067151771386438,
+ "loss": 1.7921,
+ "step": 1163
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906555864148936,
+ "loss": 1.6147,
+ "step": 1164
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906396421904137,
+ "loss": 1.6192,
+ "step": 1165
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019062368504269795,
+ "loss": 1.4341,
+ "step": 1166
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019060771497402147,
+ "loss": 1.3054,
+ "step": 1167
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001905917319866613,
+ "loss": 2.041,
+ "step": 1168
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019057573608289623,
+ "loss": 2.004,
+ "step": 1169
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019055972726500695,
+ "loss": 1.4002,
+ "step": 1170
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019054370553527595,
+ "loss": 1.5554,
+ "step": 1171
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019052767089598754,
+ "loss": 1.9783,
+ "step": 1172
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.000190511623349428,
+ "loss": 1.7443,
+ "step": 1173
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019049556289788528,
+ "loss": 1.6089,
+ "step": 1174
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001904794895436493,
+ "loss": 1.8784,
+ "step": 1175
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904634032890117,
+ "loss": 2.0985,
+ "step": 1176
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904473041362661,
+ "loss": 1.811,
+ "step": 1177
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019043119208770793,
+ "loss": 1.407,
+ "step": 1178
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904150671456343,
+ "loss": 1.7269,
+ "step": 1179
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019039892931234435,
+ "loss": 1.8374,
+ "step": 1180
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019038277859013896,
+ "loss": 1.583,
+ "step": 1181
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019036661498132086,
+ "loss": 1.6407,
+ "step": 1182
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019035043848819464,
+ "loss": 2.0828,
+ "step": 1183
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019033424911306672,
+ "loss": 1.7067,
+ "step": 1184
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019031804685824534,
+ "loss": 1.55,
+ "step": 1185
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001903018317260406,
+ "loss": 1.7573,
+ "step": 1186
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019028560371876446,
+ "loss": 1.5666,
+ "step": 1187
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001902693628387306,
+ "loss": 1.5192,
+ "step": 1188
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019025310908825466,
+ "loss": 2.0093,
+ "step": 1189
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019023684246965406,
+ "loss": 1.8414,
+ "step": 1190
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019022056298524808,
+ "loss": 1.3696,
+ "step": 1191
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019020427063735782,
+ "loss": 1.6336,
+ "step": 1192
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019018796542830617,
+ "loss": 1.8528,
+ "step": 1193
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019017164736041795,
+ "loss": 2.0523,
+ "step": 1194
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019015531643601973,
+ "loss": 1.7526,
+ "step": 1195
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019013897265743998,
+ "loss": 1.8391,
+ "step": 1196
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019012261602700892,
+ "loss": 1.4257,
+ "step": 1197
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019010624654705867,
+ "loss": 2.0911,
+ "step": 1198
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.0001900898642199232,
+ "loss": 1.7578,
+ "step": 1199
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019007346904793818,
+ "loss": 1.9003,
+ "step": 1200
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900570610334413,
+ "loss": 1.3918,
+ "step": 1201
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900406401787719,
+ "loss": 2.0365,
+ "step": 1202
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019002420648627131,
+ "loss": 1.5184,
+ "step": 1203
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019000775995828254,
+ "loss": 1.6412,
+ "step": 1204
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018999130059715058,
+ "loss": 1.5031,
+ "step": 1205
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018997482840522217,
+ "loss": 1.4421,
+ "step": 1206
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018995834338484584,
+ "loss": 1.9431,
+ "step": 1207
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.000189941845538372,
+ "loss": 1.8141,
+ "step": 1208
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.0001899253348681529,
+ "loss": 1.7289,
+ "step": 1209
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018990881137654258,
+ "loss": 1.7217,
+ "step": 1210
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.000189892275065897,
+ "loss": 2.3727,
+ "step": 1211
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018987572593857381,
+ "loss": 1.4833,
+ "step": 1212
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018985916399693256,
+ "loss": 2.13,
+ "step": 1213
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018984258924333464,
+ "loss": 1.875,
+ "step": 1214
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018982600168014323,
+ "loss": 1.783,
+ "step": 1215
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018980940130972337,
+ "loss": 1.6815,
+ "step": 1216
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897927881344419,
+ "loss": 2.049,
+ "step": 1217
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018977616215666752,
+ "loss": 1.918,
+ "step": 1218
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897595233787707,
+ "loss": 1.5824,
+ "step": 1219
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018974287180312377,
+ "loss": 1.7473,
+ "step": 1220
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018972620743210093,
+ "loss": 1.6915,
+ "step": 1221
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897095302680781,
+ "loss": 1.7633,
+ "step": 1222
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018969284031343308,
+ "loss": 1.6921,
+ "step": 1223
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018967613757054554,
+ "loss": 1.5433,
+ "step": 1224
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018965942204179686,
+ "loss": 1.9389,
+ "step": 1225
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018964269372957038,
+ "loss": 1.5625,
+ "step": 1226
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018962595263625115,
+ "loss": 1.4835,
+ "step": 1227
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018960919876422611,
+ "loss": 1.8479,
+ "step": 1228
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018959243211588397,
+ "loss": 1.7861,
+ "step": 1229
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018957565269361531,
+ "loss": 1.867,
+ "step": 1230
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018955886049981245,
+ "loss": 1.9383,
+ "step": 1231
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.0001895420555368697,
+ "loss": 1.755,
+ "step": 1232
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.000189525237807183,
+ "loss": 1.5166,
+ "step": 1233
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018950840731315024,
+ "loss": 1.8629,
+ "step": 1234
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.000189491564057171,
+ "loss": 1.6845,
+ "step": 1235
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018947470804164685,
+ "loss": 1.4748,
+ "step": 1236
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018945783926898105,
+ "loss": 1.8907,
+ "step": 1237
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018944095774157873,
+ "loss": 1.5758,
+ "step": 1238
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018942406346184683,
+ "loss": 1.6367,
+ "step": 1239
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018940715643219407,
+ "loss": 1.7285,
+ "step": 1240
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018939023665503108,
+ "loss": 1.5714,
+ "step": 1241
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001893733041327702,
+ "loss": 1.9308,
+ "step": 1242
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018935635886782568,
+ "loss": 1.9153,
+ "step": 1243
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018933940086261351,
+ "loss": 1.8009,
+ "step": 1244
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018932243011955154,
+ "loss": 1.7392,
+ "step": 1245
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018930544664105944,
+ "loss": 1.821,
+ "step": 1246
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001892884504295587,
+ "loss": 1.475,
+ "step": 1247
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018927144148747255,
+ "loss": 1.8937,
+ "step": 1248
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018925441981722618,
+ "loss": 1.6958,
+ "step": 1249
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018923738542124644,
+ "loss": 1.6836,
+ "step": 1250
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018922033830196208,
+ "loss": 2.0213,
+ "step": 1251
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018920327846180365,
+ "loss": 1.9572,
+ "step": 1252
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018918620590320352,
+ "loss": 1.9449,
+ "step": 1253
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018916912062859583,
+ "loss": 1.7297,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "eval_loss": 2.0551259517669678,
+ "eval_runtime": 283.8338,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018915202264041664,
+ "loss": 1.8158,
+ "step": 1255
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891349119411037,
+ "loss": 1.921,
+ "step": 1256
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018911778853309658,
+ "loss": 1.5726,
+ "step": 1257
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891006524188368,
+ "loss": 1.6641,
+ "step": 1258
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018908350360076752,
+ "loss": 1.5841,
+ "step": 1259
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018906634208133385,
+ "loss": 1.8567,
+ "step": 1260
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018904916786298257,
+ "loss": 1.5584,
+ "step": 1261
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018903198094816242,
+ "loss": 1.6615,
+ "step": 1262
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018901478133932385,
+ "loss": 1.7477,
+ "step": 1263
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018899756903891914,
+ "loss": 1.3796,
+ "step": 1264
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018898034404940238,
+ "loss": 1.7991,
+ "step": 1265
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018896310637322953,
+ "loss": 1.4944,
+ "step": 1266
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018894585601285827,
+ "loss": 1.5719,
+ "step": 1267
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018892859297074812,
+ "loss": 1.5495,
+ "step": 1268
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018891131724936043,
+ "loss": 1.7611,
+ "step": 1269
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018889402885115833,
+ "loss": 1.5991,
+ "step": 1270
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018887672777860676,
+ "loss": 1.8849,
+ "step": 1271
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888594140341725,
+ "loss": 1.6136,
+ "step": 1272
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888420876203241,
+ "loss": 1.8288,
+ "step": 1273
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888247485395319,
+ "loss": 1.6625,
+ "step": 1274
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018880739679426816,
+ "loss": 1.49,
+ "step": 1275
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018879003238700675,
+ "loss": 1.874,
+ "step": 1276
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018877265532022352,
+ "loss": 1.751,
+ "step": 1277
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018875526559639604,
+ "loss": 1.9882,
+ "step": 1278
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018873786321800374,
+ "loss": 1.5214,
+ "step": 1279
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001887204481875278,
+ "loss": 1.741,
+ "step": 1280
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018870302050745118,
+ "loss": 1.7798,
+ "step": 1281
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018868558018025878,
+ "loss": 1.9258,
+ "step": 1282
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001886681272084371,
+ "loss": 1.9096,
+ "step": 1283
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018865066159447466,
+ "loss": 1.6729,
+ "step": 1284
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018863318334086157,
+ "loss": 1.6239,
+ "step": 1285
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018861569245008994,
+ "loss": 1.9857,
+ "step": 1286
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018859818892465354,
+ "loss": 1.9905,
+ "step": 1287
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188580672767048,
+ "loss": 2.0073,
+ "step": 1288
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018856314397977075,
+ "loss": 1.7109,
+ "step": 1289
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188545602565321,
+ "loss": 1.3727,
+ "step": 1290
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018852804852619975,
+ "loss": 1.7045,
+ "step": 1291
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018851048186490992,
+ "loss": 1.9042,
+ "step": 1292
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018849290258395602,
+ "loss": 1.7174,
+ "step": 1293
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018847531068584452,
+ "loss": 1.6502,
+ "step": 1294
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018845770617308366,
+ "loss": 1.8582,
+ "step": 1295
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001884400890481834,
+ "loss": 1.4846,
+ "step": 1296
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018842245931365562,
+ "loss": 1.5428,
+ "step": 1297
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018840481697201392,
+ "loss": 1.7266,
+ "step": 1298
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001883871620257737,
+ "loss": 1.9324,
+ "step": 1299
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018836949447745215,
+ "loss": 1.577,
+ "step": 1300
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001883518143295683,
+ "loss": 1.6388,
+ "step": 1301
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018833412158464298,
+ "loss": 1.9201,
+ "step": 1302
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018831641624519877,
+ "loss": 1.6478,
+ "step": 1303
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018829869831376005,
+ "loss": 1.6826,
+ "step": 1304
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018828096779285303,
+ "loss": 1.8513,
+ "step": 1305
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018826322468500566,
+ "loss": 1.571,
+ "step": 1306
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018824546899274777,
+ "loss": 1.1602,
+ "step": 1307
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001882277007186109,
+ "loss": 1.9998,
+ "step": 1308
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001882099198651284,
+ "loss": 1.7034,
+ "step": 1309
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001881921264348355,
+ "loss": 1.4031,
+ "step": 1310
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018817432043026911,
+ "loss": 1.8413,
+ "step": 1311
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018815650185396797,
+ "loss": 1.6606,
+ "step": 1312
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018813867070847264,
+ "loss": 1.5792,
+ "step": 1313
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018812082699632546,
+ "loss": 1.4525,
+ "step": 1314
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018810297072007054,
+ "loss": 1.4906,
+ "step": 1315
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018808510188225377,
+ "loss": 1.6284,
+ "step": 1316
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880672204854229,
+ "loss": 1.7281,
+ "step": 1317
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880493265321274,
+ "loss": 1.5345,
+ "step": 1318
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018803142002491856,
+ "loss": 2.0933,
+ "step": 1319
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018801350096634946,
+ "loss": 1.9372,
+ "step": 1320
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.000187995569358975,
+ "loss": 1.7151,
+ "step": 1321
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018797762520535177,
+ "loss": 1.4823,
+ "step": 1322
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001879596685080383,
+ "loss": 2.0495,
+ "step": 1323
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018794169926959474,
+ "loss": 2.2966,
+ "step": 1324
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018792371749258314,
+ "loss": 1.7868,
+ "step": 1325
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018790572317956735,
+ "loss": 1.9403,
+ "step": 1326
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018788771633311292,
+ "loss": 1.6687,
+ "step": 1327
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018786969695578723,
+ "loss": 1.8422,
+ "step": 1328
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018785166505015948,
+ "loss": 1.5916,
+ "step": 1329
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018783362061880062,
+ "loss": 1.9119,
+ "step": 1330
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018781556366428336,
+ "loss": 1.4903,
+ "step": 1331
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018779749418918227,
+ "loss": 1.9497,
+ "step": 1332
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018777941219607364,
+ "loss": 1.9462,
+ "step": 1333
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018776131768753556,
+ "loss": 2.0474,
+ "step": 1334
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018774321066614795,
+ "loss": 1.4474,
+ "step": 1335
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018772509113449245,
+ "loss": 1.8315,
+ "step": 1336
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018770695909515247,
+ "loss": 1.7684,
+ "step": 1337
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018768881455071332,
+ "loss": 1.2675,
+ "step": 1338
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.000187670657503762,
+ "loss": 1.8226,
+ "step": 1339
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018765248795688726,
+ "loss": 2.2112,
+ "step": 1340
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.0001876343059126797,
+ "loss": 1.3627,
+ "step": 1341
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018761611137373173,
+ "loss": 2.1488,
+ "step": 1342
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018759790434263744,
+ "loss": 1.9842,
+ "step": 1343
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018757968482199276,
+ "loss": 1.9775,
+ "step": 1344
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018756145281439545,
+ "loss": 1.6835,
+ "step": 1345
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875432083224449,
+ "loss": 1.5272,
+ "step": 1346
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875249513487425,
+ "loss": 1.7539,
+ "step": 1347
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018750668189589117,
+ "loss": 1.874,
+ "step": 1348
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018748839996649583,
+ "loss": 1.5858,
+ "step": 1349
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018747010556316305,
+ "loss": 1.9298,
+ "step": 1350
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001874517986885012,
+ "loss": 1.5079,
+ "step": 1351
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018743347934512046,
+ "loss": 1.884,
+ "step": 1352
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018741514753563277,
+ "loss": 1.7978,
+ "step": 1353
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873968032626518,
+ "loss": 1.7735,
+ "step": 1354
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018737844652879312,
+ "loss": 1.7227,
+ "step": 1355
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018736007733667393,
+ "loss": 1.8458,
+ "step": 1356
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018734169568891334,
+ "loss": 1.3268,
+ "step": 1357
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873233015881321,
+ "loss": 1.3782,
+ "step": 1358
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018730489503695287,
+ "loss": 1.9614,
+ "step": 1359
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018728647603800003,
+ "loss": 1.7755,
+ "step": 1360
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018726804459389963,
+ "loss": 1.7961,
+ "step": 1361
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018724960070727972,
+ "loss": 1.7158,
+ "step": 1362
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872311443807699,
+ "loss": 1.6303,
+ "step": 1363
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872126756170017,
+ "loss": 1.8734,
+ "step": 1364
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018719419441860834,
+ "loss": 1.5143,
+ "step": 1365
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001871757007882248,
+ "loss": 1.498,
+ "step": 1366
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001871571947284879,
+ "loss": 1.0886,
+ "step": 1367
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018713867624203621,
+ "loss": 1.6633,
+ "step": 1368
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018712014533151008,
+ "loss": 1.8895,
+ "step": 1369
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018710160199955156,
+ "loss": 1.4178,
+ "step": 1370
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018708304624880456,
+ "loss": 1.6814,
+ "step": 1371
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001870644780819147,
+ "loss": 1.8671,
+ "step": 1372
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018704589750152944,
+ "loss": 1.4786,
+ "step": 1373
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018702730451029796,
+ "loss": 1.8622,
+ "step": 1374
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018700869911087115,
+ "loss": 1.8891,
+ "step": 1375
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869900813059018,
+ "loss": 2.0493,
+ "step": 1376
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018697145109804436,
+ "loss": 1.7238,
+ "step": 1377
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018695280848995513,
+ "loss": 1.7826,
+ "step": 1378
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869341534842921,
+ "loss": 1.8557,
+ "step": 1379
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869154860837151,
+ "loss": 1.7492,
+ "step": 1380
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868968062908857,
+ "loss": 1.7441,
+ "step": 1381
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868781141084672,
+ "loss": 1.8322,
+ "step": 1382
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868594095391247,
+ "loss": 1.8177,
+ "step": 1383
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018684069258552508,
+ "loss": 2.0001,
+ "step": 1384
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018682196325033696,
+ "loss": 1.5046,
+ "step": 1385
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018680322153623075,
+ "loss": 1.6789,
+ "step": 1386
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867844674458786,
+ "loss": 1.6951,
+ "step": 1387
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018676570098195443,
+ "loss": 2.0334,
+ "step": 1388
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018674692214713388,
+ "loss": 1.7833,
+ "step": 1389
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867281309440945,
+ "loss": 1.82,
+ "step": 1390
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018670932737551547,
+ "loss": 1.8155,
+ "step": 1391
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018669051144407775,
+ "loss": 1.7912,
+ "step": 1392
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018667168315246406,
+ "loss": 1.5816,
+ "step": 1393
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018665284250335895,
+ "loss": 1.7521,
+ "step": 1394
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018663398949944865,
+ "loss": 1.4287,
+ "step": 1395
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018661512414342127,
+ "loss": 1.6026,
+ "step": 1396
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018659624643796647,
+ "loss": 1.6953,
+ "step": 1397
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018657735638577587,
+ "loss": 1.8515,
+ "step": 1398
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018655845398954276,
+ "loss": 2.0384,
+ "step": 1399
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018653953925196225,
+ "loss": 1.5458,
+ "step": 1400
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018652061217573114,
+ "loss": 1.7166,
+ "step": 1401
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.000186501672763548,
+ "loss": 1.5653,
+ "step": 1402
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018648272101811318,
+ "loss": 2.0928,
+ "step": 1403
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018646375694212884,
+ "loss": 1.605,
+ "step": 1404
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018644478053829878,
+ "loss": 1.4734,
+ "step": 1405
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018642579180932865,
+ "loss": 2.0578,
+ "step": 1406
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018640679075792582,
+ "loss": 1.9823,
+ "step": 1407
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018638777738679943,
+ "loss": 2.0551,
+ "step": 1408
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018636875169866036,
+ "loss": 1.6315,
+ "step": 1409
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863497136962213,
+ "loss": 1.8965,
+ "step": 1410
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863306633821966,
+ "loss": 1.3584,
+ "step": 1411
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018631160075930245,
+ "loss": 1.9673,
+ "step": 1412
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018629252583025676,
+ "loss": 1.5277,
+ "step": 1413
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001862734385977792,
+ "loss": 1.6788,
+ "step": 1414
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018625433906459116,
+ "loss": 1.432,
+ "step": 1415
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018623522723341588,
+ "loss": 1.8102,
+ "step": 1416
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018621610310697823,
+ "loss": 1.6713,
+ "step": 1417
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018619696668800492,
+ "loss": 1.6989,
+ "step": 1418
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001861778179792244,
+ "loss": 1.7645,
+ "step": 1419
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018615865698336684,
+ "loss": 1.594,
+ "step": 1420
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018613948370316415,
+ "loss": 1.8751,
+ "step": 1421
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018612029814135014,
+ "loss": 1.64,
+ "step": 1422
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018610110030066007,
+ "loss": 1.5066,
+ "step": 1423
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001860818901838313,
+ "loss": 1.9817,
+ "step": 1424
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018606266779360266,
+ "loss": 2.056,
+ "step": 1425
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860434331327149,
+ "loss": 1.6997,
+ "step": 1426
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018602418620391044,
+ "loss": 1.5573,
+ "step": 1427
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860049270099335,
+ "loss": 1.8427,
+ "step": 1428
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018598565555353,
+ "loss": 2.012,
+ "step": 1429
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018596637183744763,
+ "loss": 1.7976,
+ "step": 1430
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018594707586443585,
+ "loss": 1.4,
+ "step": 1431
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001859277676372458,
+ "loss": 1.8717,
+ "step": 1432
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018590844715863045,
+ "loss": 1.4311,
+ "step": 1433
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018588911443134448,
+ "loss": 1.5903,
+ "step": 1434
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018586976945814425,
+ "loss": 2.0898,
+ "step": 1435
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018585041224178803,
+ "loss": 1.5302,
+ "step": 1436
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018583104278503568,
+ "loss": 1.9582,
+ "step": 1437
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018581166109064886,
+ "loss": 1.5264,
+ "step": 1438
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018579226716139096,
+ "loss": 1.6551,
+ "step": 1439
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018577286100002723,
+ "loss": 1.7774,
+ "step": 1440
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018575344260932444,
+ "loss": 1.8316,
+ "step": 1441
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001857340119920513,
+ "loss": 1.3916,
+ "step": 1442
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018571456915097818,
+ "loss": 1.6728,
+ "step": 1443
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856951140888772,
+ "loss": 1.7247,
+ "step": 1444
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018567564680852224,
+ "loss": 1.4539,
+ "step": 1445
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018565616731268888,
+ "loss": 1.613,
+ "step": 1446
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856366756041545,
+ "loss": 1.757,
+ "step": 1447
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018561717168569816,
+ "loss": 1.6903,
+ "step": 1448
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018559765556010072,
+ "loss": 1.7322,
+ "step": 1449
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018557812723014476,
+ "loss": 1.5627,
+ "step": 1450
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018555858669861458,
+ "loss": 1.8751,
+ "step": 1451
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018553903396829625,
+ "loss": 1.2721,
+ "step": 1452
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018551946904197752,
+ "loss": 1.8167,
+ "step": 1453
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018549989192244797,
+ "loss": 1.6602,
+ "step": 1454
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018548030261249885,
+ "loss": 1.9053,
+ "step": 1455
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018546070111492315,
+ "loss": 1.7721,
+ "step": 1456
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018544108743251566,
+ "loss": 2.1421,
+ "step": 1457
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018542146156807284,
+ "loss": 1.5076,
+ "step": 1458
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018540182352439288,
+ "loss": 1.9039,
+ "step": 1459
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018538217330427582,
+ "loss": 1.9777,
+ "step": 1460
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018536251091052323,
+ "loss": 1.5702,
+ "step": 1461
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018534283634593862,
+ "loss": 1.851,
+ "step": 1462
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018532314961332717,
+ "loss": 1.5337,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "eval_loss": 2.068387508392334,
+ "eval_runtime": 283.4638,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018530345071549574,
+ "loss": 1.7553,
+ "step": 1464
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018528373965525296,
+ "loss": 1.4175,
+ "step": 1465
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018526401643540922,
+ "loss": 1.7216,
+ "step": 1466
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018524428105877664,
+ "loss": 1.6415,
+ "step": 1467
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018522453352816896,
+ "loss": 1.7284,
+ "step": 1468
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018520477384640187,
+ "loss": 1.8314,
+ "step": 1469
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018518500201629258,
+ "loss": 1.8341,
+ "step": 1470
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018516521804066015,
+ "loss": 1.4129,
+ "step": 1471
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018514542192232537,
+ "loss": 1.4671,
+ "step": 1472
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018512561366411067,
+ "loss": 1.6665,
+ "step": 1473
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018510579326884034,
+ "loss": 1.5722,
+ "step": 1474
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850859607393403,
+ "loss": 1.9348,
+ "step": 1475
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850661160784383,
+ "loss": 1.5404,
+ "step": 1476
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018504625928896363,
+ "loss": 1.4769,
+ "step": 1477
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018502639037374757,
+ "loss": 1.4149,
+ "step": 1478
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850065093356229,
+ "loss": 1.958,
+ "step": 1479
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018498661617742426,
+ "loss": 1.8319,
+ "step": 1480
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018496671090198797,
+ "loss": 1.5948,
+ "step": 1481
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001849467935121521,
+ "loss": 1.8469,
+ "step": 1482
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018492686401075644,
+ "loss": 1.6798,
+ "step": 1483
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001849069224006425,
+ "loss": 1.8197,
+ "step": 1484
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848869686846535,
+ "loss": 1.6613,
+ "step": 1485
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848670028656344,
+ "loss": 1.7322,
+ "step": 1486
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018484702494643188,
+ "loss": 2.0493,
+ "step": 1487
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018482703492989444,
+ "loss": 1.7182,
+ "step": 1488
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018480703281887215,
+ "loss": 1.689,
+ "step": 1489
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018478701861621686,
+ "loss": 1.9477,
+ "step": 1490
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001847669923247822,
+ "loss": 1.8171,
+ "step": 1491
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018474695394742345,
+ "loss": 1.7337,
+ "step": 1492
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847269034869977,
+ "loss": 1.6983,
+ "step": 1493
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847068409463636,
+ "loss": 1.6445,
+ "step": 1494
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846867663283818,
+ "loss": 1.9965,
+ "step": 1495
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846666796359143,
+ "loss": 1.6775,
+ "step": 1496
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846465808718252,
+ "loss": 1.8117,
+ "step": 1497
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018462647003898006,
+ "loss": 1.8803,
+ "step": 1498
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018460634714024624,
+ "loss": 1.3045,
+ "step": 1499
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018458621217849286,
+ "loss": 1.7768,
+ "step": 1500
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018456606515659073,
+ "loss": 2.0641,
+ "step": 1501
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001845459060774123,
+ "loss": 1.3804,
+ "step": 1502
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018452573494383192,
+ "loss": 1.6271,
+ "step": 1503
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018450555175872547,
+ "loss": 1.8525,
+ "step": 1504
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018448535652497073,
+ "loss": 1.5303,
+ "step": 1505
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.000184465149245447,
+ "loss": 2.0368,
+ "step": 1506
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018444492992303544,
+ "loss": 1.9951,
+ "step": 1507
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001844246985606189,
+ "loss": 1.8715,
+ "step": 1508
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018440445516108186,
+ "loss": 1.7373,
+ "step": 1509
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018438419972731067,
+ "loss": 1.7667,
+ "step": 1510
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018436393226219327,
+ "loss": 1.5134,
+ "step": 1511
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018434365276861938,
+ "loss": 1.3891,
+ "step": 1512
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001843233612494804,
+ "loss": 1.7066,
+ "step": 1513
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018430305770766948,
+ "loss": 1.6366,
+ "step": 1514
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842827421460814,
+ "loss": 1.7838,
+ "step": 1515
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842624145676128,
+ "loss": 1.7884,
+ "step": 1516
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001842420749751619,
+ "loss": 1.8428,
+ "step": 1517
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018422172337162867,
+ "loss": 1.4987,
+ "step": 1518
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018420135975991483,
+ "loss": 1.7576,
+ "step": 1519
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001841809841429238,
+ "loss": 1.8522,
+ "step": 1520
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018416059652356066,
+ "loss": 1.9308,
+ "step": 1521
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018414019690473227,
+ "loss": 1.4658,
+ "step": 1522
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018411978528934717,
+ "loss": 1.7072,
+ "step": 1523
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001840993616803156,
+ "loss": 1.736,
+ "step": 1524
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840789260805495,
+ "loss": 1.7712,
+ "step": 1525
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840584784929626,
+ "loss": 1.2231,
+ "step": 1526
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018403801892047023,
+ "loss": 1.8421,
+ "step": 1527
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018401754736598947,
+ "loss": 1.2689,
+ "step": 1528
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018399706383243918,
+ "loss": 1.8062,
+ "step": 1529
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839765683227398,
+ "loss": 1.6846,
+ "step": 1530
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839560608398136,
+ "loss": 1.8201,
+ "step": 1531
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018393554138658441,
+ "loss": 1.6958,
+ "step": 1532
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018391500996597796,
+ "loss": 1.8487,
+ "step": 1533
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.0001838944665809215,
+ "loss": 1.9788,
+ "step": 1534
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018387391123434412,
+ "loss": 1.6002,
+ "step": 1535
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018385334392917658,
+ "loss": 1.3859,
+ "step": 1536
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018383276466835127,
+ "loss": 2.0743,
+ "step": 1537
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018381217345480235,
+ "loss": 1.8357,
+ "step": 1538
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018379157029146573,
+ "loss": 1.7002,
+ "step": 1539
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018377095518127897,
+ "loss": 1.3058,
+ "step": 1540
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018375032812718124,
+ "loss": 1.8745,
+ "step": 1541
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018372968913211364,
+ "loss": 1.7847,
+ "step": 1542
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018370903819901874,
+ "loss": 1.8156,
+ "step": 1543
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018368837533084095,
+ "loss": 2.0152,
+ "step": 1544
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018366770053052634,
+ "loss": 1.5656,
+ "step": 1545
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018364701380102266,
+ "loss": 1.5753,
+ "step": 1546
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018362631514527947,
+ "loss": 1.3938,
+ "step": 1547
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018360560456624788,
+ "loss": 1.9599,
+ "step": 1548
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018358488206688075,
+ "loss": 1.8641,
+ "step": 1549
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018356414765013267,
+ "loss": 1.8428,
+ "step": 1550
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018354340131895998,
+ "loss": 1.6016,
+ "step": 1551
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018352264307632056,
+ "loss": 1.5768,
+ "step": 1552
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018350187292517415,
+ "loss": 1.5369,
+ "step": 1553
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.0001834810908684821,
+ "loss": 1.9717,
+ "step": 1554
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018346029690920746,
+ "loss": 1.943,
+ "step": 1555
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018343949105031505,
+ "loss": 1.8166,
+ "step": 1556
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018341867329477125,
+ "loss": 1.7149,
+ "step": 1557
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018339784364554426,
+ "loss": 1.4657,
+ "step": 1558
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018337700210560397,
+ "loss": 1.8693,
+ "step": 1559
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018335614867792183,
+ "loss": 1.7656,
+ "step": 1560
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001833352833654712,
+ "loss": 1.5123,
+ "step": 1561
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018331440617122696,
+ "loss": 1.7884,
+ "step": 1562
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832935170981657,
+ "loss": 1.7309,
+ "step": 1563
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018327261614926583,
+ "loss": 1.9628,
+ "step": 1564
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018325170332750732,
+ "loss": 1.6409,
+ "step": 1565
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832307786358719,
+ "loss": 1.6093,
+ "step": 1566
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018320984207734298,
+ "loss": 1.6111,
+ "step": 1567
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018318889365490565,
+ "loss": 2.0085,
+ "step": 1568
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018316793337154664,
+ "loss": 2.079,
+ "step": 1569
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018314696123025454,
+ "loss": 1.5466,
+ "step": 1570
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018312597723401942,
+ "loss": 2.0825,
+ "step": 1571
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.0001831049813858332,
+ "loss": 1.9748,
+ "step": 1572
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018308397368868945,
+ "loss": 1.6529,
+ "step": 1573
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018306295414558335,
+ "loss": 1.7119,
+ "step": 1574
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018304192275951184,
+ "loss": 1.8812,
+ "step": 1575
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018302087953347352,
+ "loss": 1.8676,
+ "step": 1576
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018299982447046877,
+ "loss": 1.879,
+ "step": 1577
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018297875757349952,
+ "loss": 1.6282,
+ "step": 1578
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018295767884556947,
+ "loss": 1.735,
+ "step": 1579
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018293658828968397,
+ "loss": 1.5796,
+ "step": 1580
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018291548590885007,
+ "loss": 1.8258,
+ "step": 1581
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018289437170607658,
+ "loss": 1.7531,
+ "step": 1582
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018287324568437381,
+ "loss": 1.6265,
+ "step": 1583
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018285210784675394,
+ "loss": 1.7997,
+ "step": 1584
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018283095819623078,
+ "loss": 1.955,
+ "step": 1585
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018280979673581977,
+ "loss": 1.6542,
+ "step": 1586
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018278862346853808,
+ "loss": 1.7634,
+ "step": 1587
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018276743839740458,
+ "loss": 2.0077,
+ "step": 1588
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018274624152543977,
+ "loss": 2.0254,
+ "step": 1589
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018272503285566587,
+ "loss": 1.4464,
+ "step": 1590
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018270381239110677,
+ "loss": 1.8643,
+ "step": 1591
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018268258013478804,
+ "loss": 1.3278,
+ "step": 1592
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018266133608973696,
+ "loss": 1.744,
+ "step": 1593
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018264008025898248,
+ "loss": 1.5079,
+ "step": 1594
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018261881264555516,
+ "loss": 1.9655,
+ "step": 1595
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.0001825975332524873,
+ "loss": 2.0557,
+ "step": 1596
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.000182576242082813,
+ "loss": 1.7174,
+ "step": 1597
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018255493913956774,
+ "loss": 1.449,
+ "step": 1598
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018253362442578896,
+ "loss": 1.9058,
+ "step": 1599
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018251229794451567,
+ "loss": 1.3482,
+ "step": 1600
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018249095969878853,
+ "loss": 1.7906,
+ "step": 1601
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018246960969164994,
+ "loss": 1.6177,
+ "step": 1602
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018244824792614393,
+ "loss": 1.5786,
+ "step": 1603
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018242687440531618,
+ "loss": 1.6451,
+ "step": 1604
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018240548913221416,
+ "loss": 1.3695,
+ "step": 1605
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.0001823840921098869,
+ "loss": 1.6648,
+ "step": 1606
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018236268334138515,
+ "loss": 2.1548,
+ "step": 1607
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018234126282976133,
+ "loss": 1.6153,
+ "step": 1608
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001823198305780696,
+ "loss": 1.741,
+ "step": 1609
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018229838658936564,
+ "loss": 1.7827,
+ "step": 1610
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018227693086670697,
+ "loss": 1.7343,
+ "step": 1611
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018225546341315261,
+ "loss": 1.8149,
+ "step": 1612
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001822339842317635,
+ "loss": 1.5497,
+ "step": 1613
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018221249332560198,
+ "loss": 1.7659,
+ "step": 1614
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001821909906977322,
+ "loss": 1.8992,
+ "step": 1615
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018216947635122,
+ "loss": 1.8682,
+ "step": 1616
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018214795028913288,
+ "loss": 1.9774,
+ "step": 1617
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001821264125145399,
+ "loss": 1.9441,
+ "step": 1618
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018210486303051195,
+ "loss": 2.0314,
+ "step": 1619
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001820833018401215,
+ "loss": 1.8234,
+ "step": 1620
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018206172894644272,
+ "loss": 1.9478,
+ "step": 1621
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018204014435255135,
+ "loss": 1.7894,
+ "step": 1622
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.000182018548061525,
+ "loss": 1.5469,
+ "step": 1623
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018199694007644277,
+ "loss": 1.9419,
+ "step": 1624
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018197532040038547,
+ "loss": 1.6686,
+ "step": 1625
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018195368903643563,
+ "loss": 2.2525,
+ "step": 1626
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018193204598767744,
+ "loss": 1.8076,
+ "step": 1627
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018191039125719662,
+ "loss": 1.976,
+ "step": 1628
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018188872484808076,
+ "loss": 1.6896,
+ "step": 1629
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018186704676341898,
+ "loss": 1.6784,
+ "step": 1630
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018184535700630213,
+ "loss": 1.9634,
+ "step": 1631
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018182365557982264,
+ "loss": 1.7406,
+ "step": 1632
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018180194248707473,
+ "loss": 1.7492,
+ "step": 1633
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018178021773115414,
+ "loss": 1.7731,
+ "step": 1634
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018175848131515837,
+ "loss": 1.6232,
+ "step": 1635
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817367332421866,
+ "loss": 1.7488,
+ "step": 1636
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817149735153396,
+ "loss": 1.3398,
+ "step": 1637
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018169320213771983,
+ "loss": 1.4521,
+ "step": 1638
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018167141911243145,
+ "loss": 1.6311,
+ "step": 1639
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018164962444258014,
+ "loss": 1.8911,
+ "step": 1640
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018162781813127346,
+ "loss": 1.9879,
+ "step": 1641
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001816060001816205,
+ "loss": 1.5637,
+ "step": 1642
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018158417059673196,
+ "loss": 1.7461,
+ "step": 1643
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815623293797203,
+ "loss": 1.6671,
+ "step": 1644
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815404765336996,
+ "loss": 1.2124,
+ "step": 1645
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815186120617856,
+ "loss": 1.6402,
+ "step": 1646
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001814967359670957,
+ "loss": 1.8837,
+ "step": 1647
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018147484825274893,
+ "loss": 1.8027,
+ "step": 1648
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018145294892186605,
+ "loss": 1.7684,
+ "step": 1649
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.0001814310379775694,
+ "loss": 1.8274,
+ "step": 1650
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.000181409115422983,
+ "loss": 1.8292,
+ "step": 1651
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018138718126123248,
+ "loss": 1.3492,
+ "step": 1652
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018136523549544523,
+ "loss": 1.509,
+ "step": 1653
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018134327812875024,
+ "loss": 1.7415,
+ "step": 1654
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018132130916427816,
+ "loss": 1.5223,
+ "step": 1655
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018129932860516126,
+ "loss": 1.9294,
+ "step": 1656
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018127733645453348,
+ "loss": 2.0716,
+ "step": 1657
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018125533271553043,
+ "loss": 1.57,
+ "step": 1658
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018123331739128938,
+ "loss": 2.2132,
+ "step": 1659
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018121129048494922,
+ "loss": 1.9006,
+ "step": 1660
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018118925199965048,
+ "loss": 1.9319,
+ "step": 1661
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018116720193853543,
+ "loss": 1.8103,
+ "step": 1662
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018114514030474787,
+ "loss": 1.7028,
+ "step": 1663
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018112306710143334,
+ "loss": 1.802,
+ "step": 1664
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.000181100982331739,
+ "loss": 1.6835,
+ "step": 1665
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.0001810788859988136,
+ "loss": 1.7223,
+ "step": 1666
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810567781058077,
+ "loss": 1.5829,
+ "step": 1667
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018103465865587333,
+ "loss": 1.9863,
+ "step": 1668
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810125276521642,
+ "loss": 1.6398,
+ "step": 1669
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018099038509783582,
+ "loss": 1.9261,
+ "step": 1670
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018096823099604517,
+ "loss": 1.8882,
+ "step": 1671
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018094606534995093,
+ "loss": 1.6716,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "eval_loss": 2.075261354446411,
+ "eval_runtime": 283.9438,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018092388816271345,
+ "loss": 1.6688,
+ "step": 1673
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018090169943749476,
+ "loss": 1.9127,
+ "step": 1674
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001808794991774584,
+ "loss": 1.7214,
+ "step": 1675
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018085728738576973,
+ "loss": 1.785,
+ "step": 1676
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018083506406559561,
+ "loss": 1.5287,
+ "step": 1677
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018081282922010464,
+ "loss": 1.9012,
+ "step": 1678
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018079058285246698,
+ "loss": 1.3094,
+ "step": 1679
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807683249658545,
+ "loss": 1.818,
+ "step": 1680
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807460555634407,
+ "loss": 1.9389,
+ "step": 1681
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807237746484007,
+ "loss": 1.4334,
+ "step": 1682
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018070148222391126,
+ "loss": 1.5422,
+ "step": 1683
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806791782931508,
+ "loss": 1.7899,
+ "step": 1684
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806568628592994,
+ "loss": 1.6106,
+ "step": 1685
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018063453592553872,
+ "loss": 1.9807,
+ "step": 1686
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806121974950521,
+ "loss": 1.1762,
+ "step": 1687
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018058984757102456,
+ "loss": 1.8338,
+ "step": 1688
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001805674861566426,
+ "loss": 1.5556,
+ "step": 1689
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805451132550946,
+ "loss": 0.87,
+ "step": 1690
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018052272886957038,
+ "loss": 1.0386,
+ "step": 1691
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805003330032615,
+ "loss": 0.8153,
+ "step": 1692
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018047792565936102,
+ "loss": 1.1745,
+ "step": 1693
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018045550684106388,
+ "loss": 1.1584,
+ "step": 1694
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018043307655156644,
+ "loss": 1.0742,
+ "step": 1695
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018041063479406675,
+ "loss": 1.0537,
+ "step": 1696
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803881815717646,
+ "loss": 1.0239,
+ "step": 1697
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803657168878612,
+ "loss": 0.9182,
+ "step": 1698
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018034324074555965,
+ "loss": 1.1856,
+ "step": 1699
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018032075314806448,
+ "loss": 1.3285,
+ "step": 1700
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018029825409858198,
+ "loss": 1.2912,
+ "step": 1701
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018027574360032,
+ "loss": 1.3666,
+ "step": 1702
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018025322165648807,
+ "loss": 0.9621,
+ "step": 1703
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018023068827029723,
+ "loss": 0.8484,
+ "step": 1704
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018020814344496037,
+ "loss": 1.2236,
+ "step": 1705
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018018558718369186,
+ "loss": 0.8155,
+ "step": 1706
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.0001801630194897077,
+ "loss": 1.2047,
+ "step": 1707
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018014044036622555,
+ "loss": 1.0269,
+ "step": 1708
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018011784981646474,
+ "loss": 1.0536,
+ "step": 1709
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018009524784364615,
+ "loss": 1.0516,
+ "step": 1710
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018007263445099235,
+ "loss": 0.9087,
+ "step": 1711
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001800500096417275,
+ "loss": 1.3057,
+ "step": 1712
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018002737341907743,
+ "loss": 0.8791,
+ "step": 1713
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018000472578626956,
+ "loss": 1.1667,
+ "step": 1714
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017998206674653294,
+ "loss": 1.1026,
+ "step": 1715
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017995939630309826,
+ "loss": 1.3228,
+ "step": 1716
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001799367144591978,
+ "loss": 0.9173,
+ "step": 1717
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017991402121806557,
+ "loss": 1.0067,
+ "step": 1718
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798913165829371,
+ "loss": 1.0256,
+ "step": 1719
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017986860055704953,
+ "loss": 0.7645,
+ "step": 1720
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798458731436417,
+ "loss": 1.0567,
+ "step": 1721
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017982313434595406,
+ "loss": 0.7465,
+ "step": 1722
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017980038416722863,
+ "loss": 1.3268,
+ "step": 1723
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017977762261070916,
+ "loss": 0.9917,
+ "step": 1724
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017975484967964087,
+ "loss": 0.8592,
+ "step": 1725
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017973206537727073,
+ "loss": 1.43,
+ "step": 1726
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017970926970684725,
+ "loss": 1.3679,
+ "step": 1727
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017968646267162063,
+ "loss": 1.2959,
+ "step": 1728
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017966364427484267,
+ "loss": 1.0674,
+ "step": 1729
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017964081451976672,
+ "loss": 1.1153,
+ "step": 1730
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017961797340964783,
+ "loss": 1.0586,
+ "step": 1731
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017959512094774266,
+ "loss": 1.2388,
+ "step": 1732
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017957225713730949,
+ "loss": 1.257,
+ "step": 1733
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.0001795493819816081,
+ "loss": 1.099,
+ "step": 1734
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001795264954839001,
+ "loss": 0.9532,
+ "step": 1735
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017950359764744859,
+ "loss": 1.2553,
+ "step": 1736
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017948068847551825,
+ "loss": 0.9973,
+ "step": 1737
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017945776797137543,
+ "loss": 1.0637,
+ "step": 1738
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017943483613828815,
+ "loss": 1.1815,
+ "step": 1739
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017941189297952597,
+ "loss": 0.8378,
+ "step": 1740
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017938893849836002,
+ "loss": 0.9375,
+ "step": 1741
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017936597269806322,
+ "loss": 0.9653,
+ "step": 1742
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001793429955819099,
+ "loss": 1.221,
+ "step": 1743
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017932000715317612,
+ "loss": 1.041,
+ "step": 1744
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017929700741513955,
+ "loss": 1.0724,
+ "step": 1745
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017927399637107945,
+ "loss": 1.1102,
+ "step": 1746
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017925097402427667,
+ "loss": 0.8542,
+ "step": 1747
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001792279403780137,
+ "loss": 1.2339,
+ "step": 1748
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017920489543557465,
+ "loss": 0.8671,
+ "step": 1749
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791818392002452,
+ "loss": 0.9779,
+ "step": 1750
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791587716753127,
+ "loss": 1.1242,
+ "step": 1751
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017913569286406603,
+ "loss": 0.9043,
+ "step": 1752
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001791126027697958,
+ "loss": 0.7996,
+ "step": 1753
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017908950139579406,
+ "loss": 0.8602,
+ "step": 1754
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017906638874535462,
+ "loss": 1.0161,
+ "step": 1755
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017904326482177284,
+ "loss": 0.8226,
+ "step": 1756
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017902012962834566,
+ "loss": 1.3885,
+ "step": 1757
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001789969831683717,
+ "loss": 1.2158,
+ "step": 1758
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017897382544515108,
+ "loss": 1.3261,
+ "step": 1759
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017895065646198567,
+ "loss": 1.2144,
+ "step": 1760
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017892747622217875,
+ "loss": 0.9881,
+ "step": 1761
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001789042847290354,
+ "loss": 1.0342,
+ "step": 1762
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017888108198586217,
+ "loss": 0.7883,
+ "step": 1763
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017885786799596732,
+ "loss": 0.9006,
+ "step": 1764
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017883464276266064,
+ "loss": 1.3695,
+ "step": 1765
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001788114062892535,
+ "loss": 1.0303,
+ "step": 1766
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017878815857905897,
+ "loss": 1.3816,
+ "step": 1767
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001787648996353916,
+ "loss": 0.8684,
+ "step": 1768
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017874162946156772,
+ "loss": 1.1157,
+ "step": 1769
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017871834806090501,
+ "loss": 1.0087,
+ "step": 1770
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.000178695055436723,
+ "loss": 0.7173,
+ "step": 1771
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017867175159234265,
+ "loss": 1.4784,
+ "step": 1772
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017864843653108662,
+ "loss": 1.1401,
+ "step": 1773
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786251102562791,
+ "loss": 1.0952,
+ "step": 1774
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786017727712459,
+ "loss": 0.9443,
+ "step": 1775
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017857842407931445,
+ "loss": 1.0682,
+ "step": 1776
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785550641838138,
+ "loss": 0.9402,
+ "step": 1777
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017853169308807448,
+ "loss": 1.0576,
+ "step": 1778
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785083107954288,
+ "loss": 1.1425,
+ "step": 1779
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017848491730921046,
+ "loss": 1.1402,
+ "step": 1780
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017846151263275494,
+ "loss": 1.4482,
+ "step": 1781
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017843809676939922,
+ "loss": 0.7765,
+ "step": 1782
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017841466972248188,
+ "loss": 1.1478,
+ "step": 1783
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001783912314953431,
+ "loss": 1.1876,
+ "step": 1784
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017836778209132464,
+ "loss": 1.2036,
+ "step": 1785
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783443215137699,
+ "loss": 1.0297,
+ "step": 1786
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783208497660239,
+ "loss": 0.8186,
+ "step": 1787
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017829736685143308,
+ "loss": 0.7258,
+ "step": 1788
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017827387277334568,
+ "loss": 0.8072,
+ "step": 1789
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017825036753511144,
+ "loss": 1.0474,
+ "step": 1790
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017822685114008167,
+ "loss": 1.2141,
+ "step": 1791
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017820332359160928,
+ "loss": 1.1443,
+ "step": 1792
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001781797848930488,
+ "loss": 0.9864,
+ "step": 1793
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017815623504775636,
+ "loss": 1.2998,
+ "step": 1794
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001781326740590896,
+ "loss": 1.0672,
+ "step": 1795
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017810910193040785,
+ "loss": 0.9152,
+ "step": 1796
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.000178085518665072,
+ "loss": 1.2555,
+ "step": 1797
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017806192426644444,
+ "loss": 1.2085,
+ "step": 1798
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017803831873788926,
+ "loss": 1.6205,
+ "step": 1799
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001780147020827721,
+ "loss": 1.3382,
+ "step": 1800
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017799107430446016,
+ "loss": 1.3309,
+ "step": 1801
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017796743540632223,
+ "loss": 1.2556,
+ "step": 1802
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017794378539172877,
+ "loss": 0.829,
+ "step": 1803
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017792012426405166,
+ "loss": 1.1711,
+ "step": 1804
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017789645202666456,
+ "loss": 1.0128,
+ "step": 1805
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017787276868294253,
+ "loss": 1.2074,
+ "step": 1806
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017784907423626237,
+ "loss": 1.0996,
+ "step": 1807
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778253686900023,
+ "loss": 0.9608,
+ "step": 1808
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778016520475423,
+ "loss": 0.827,
+ "step": 1809
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017777792431226383,
+ "loss": 1.2365,
+ "step": 1810
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017775418548754993,
+ "loss": 1.0276,
+ "step": 1811
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777304355767852,
+ "loss": 0.8178,
+ "step": 1812
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777066745833559,
+ "loss": 1.1297,
+ "step": 1813
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017768290251064987,
+ "loss": 1.1737,
+ "step": 1814
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017765911936205644,
+ "loss": 1.1606,
+ "step": 1815
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017763532514096658,
+ "loss": 1.2605,
+ "step": 1816
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001776115198507728,
+ "loss": 1.2271,
+ "step": 1817
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017758770349486923,
+ "loss": 0.9407,
+ "step": 1818
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001775638760766516,
+ "loss": 1.0273,
+ "step": 1819
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017754003759951715,
+ "loss": 1.0746,
+ "step": 1820
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017751618806686472,
+ "loss": 1.0091,
+ "step": 1821
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017749232748209473,
+ "loss": 0.997,
+ "step": 1822
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001774684558486092,
+ "loss": 1.4814,
+ "step": 1823
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017744457316981168,
+ "loss": 1.1407,
+ "step": 1824
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017742067944910737,
+ "loss": 0.9824,
+ "step": 1825
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017739677468990293,
+ "loss": 1.2603,
+ "step": 1826
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017737285889560668,
+ "loss": 1.3721,
+ "step": 1827
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017734893206962853,
+ "loss": 1.1186,
+ "step": 1828
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017732499421537984,
+ "loss": 0.7693,
+ "step": 1829
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001773010453362737,
+ "loss": 1.0449,
+ "step": 1830
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017727708543572467,
+ "loss": 0.9331,
+ "step": 1831
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001772531145171489,
+ "loss": 0.739,
+ "step": 1832
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017722913258396417,
+ "loss": 0.9076,
+ "step": 1833
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017720513963958968,
+ "loss": 1.3464,
+ "step": 1834
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017718113568744638,
+ "loss": 0.8858,
+ "step": 1835
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017715712073095672,
+ "loss": 1.3204,
+ "step": 1836
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017713309477354467,
+ "loss": 1.0538,
+ "step": 1837
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001771090578186358,
+ "loss": 1.44,
+ "step": 1838
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770850098696573,
+ "loss": 1.0167,
+ "step": 1839
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017706095093003785,
+ "loss": 0.9724,
+ "step": 1840
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017703688100320774,
+ "loss": 0.8055,
+ "step": 1841
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770128000925988,
+ "loss": 0.7363,
+ "step": 1842
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017698870820164446,
+ "loss": 1.1329,
+ "step": 1843
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017696460533377968,
+ "loss": 0.9487,
+ "step": 1844
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017694049149244104,
+ "loss": 1.2571,
+ "step": 1845
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001769163666810666,
+ "loss": 0.9148,
+ "step": 1846
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017689223090309607,
+ "loss": 1.4676,
+ "step": 1847
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017686808416197072,
+ "loss": 0.9395,
+ "step": 1848
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017684392646113325,
+ "loss": 0.9632,
+ "step": 1849
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017681975780402807,
+ "loss": 1.0037,
+ "step": 1850
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001767955781941011,
+ "loss": 0.9557,
+ "step": 1851
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017677138763479985,
+ "loss": 1.2799,
+ "step": 1852
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017674718612957336,
+ "loss": 0.8461,
+ "step": 1853
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001767229736818722,
+ "loss": 1.2762,
+ "step": 1854
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017669875029514856,
+ "loss": 1.4801,
+ "step": 1855
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017667451597285617,
+ "loss": 0.9849,
+ "step": 1856
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766502707184503,
+ "loss": 1.0875,
+ "step": 1857
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017662601453538783,
+ "loss": 0.8346,
+ "step": 1858
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766017474271271,
+ "loss": 1.1933,
+ "step": 1859
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017657746939712815,
+ "loss": 0.8789,
+ "step": 1860
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017655318044885245,
+ "loss": 1.0091,
+ "step": 1861
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.0001765288805857631,
+ "loss": 0.7371,
+ "step": 1862
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017650456981132466,
+ "loss": 0.8131,
+ "step": 1863
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017648024812900342,
+ "loss": 1.0795,
+ "step": 1864
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.000176455915542267,
+ "loss": 0.9882,
+ "step": 1865
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017643157205458483,
+ "loss": 1.212,
+ "step": 1866
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017640721766942768,
+ "loss": 1.4755,
+ "step": 1867
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017638285239026798,
+ "loss": 1.0391,
+ "step": 1868
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017635847622057965,
+ "loss": 1.2568,
+ "step": 1869
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017633408916383826,
+ "loss": 1.2138,
+ "step": 1870
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001763096912235208,
+ "loss": 1.196,
+ "step": 1871
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017628528240310596,
+ "loss": 1.1476,
+ "step": 1872
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017626086270607384,
+ "loss": 1.1421,
+ "step": 1873
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017623643213590619,
+ "loss": 1.0711,
+ "step": 1874
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001762119906960863,
+ "loss": 0.8842,
+ "step": 1875
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017618753839009893,
+ "loss": 0.798,
+ "step": 1876
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761630752214305,
+ "loss": 0.8591,
+ "step": 1877
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017613860119356883,
+ "loss": 0.7646,
+ "step": 1878
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761141163100035,
+ "loss": 1.4113,
+ "step": 1879
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017608962057422549,
+ "loss": 0.8605,
+ "step": 1880
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017606511398972731,
+ "loss": 0.6179,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "eval_loss": 2.3971996307373047,
+ "eval_runtime": 283.7444,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760405965600031,
+ "loss": 0.8651,
+ "step": 1882
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760160682885485,
+ "loss": 1.3178,
+ "step": 1883
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017599152917886071,
+ "loss": 0.9233,
+ "step": 1884
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017596697923443847,
+ "loss": 0.9126,
+ "step": 1885
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001759424184587821,
+ "loss": 0.9749,
+ "step": 1886
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017591784685539334,
+ "loss": 1.1929,
+ "step": 1887
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017589326442777565,
+ "loss": 1.2026,
+ "step": 1888
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017586867117943392,
+ "loss": 1.1162,
+ "step": 1889
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017584406711387463,
+ "loss": 0.9818,
+ "step": 1890
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001758194522346057,
+ "loss": 0.9802,
+ "step": 1891
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001757948265451368,
+ "loss": 0.8963,
+ "step": 1892
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017577019004897897,
+ "loss": 1.0359,
+ "step": 1893
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017574554274964478,
+ "loss": 1.0788,
+ "step": 1894
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017572088465064848,
+ "loss": 0.9415,
+ "step": 1895
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756962157555057,
+ "loss": 1.0944,
+ "step": 1896
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017567153606773373,
+ "loss": 1.357,
+ "step": 1897
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017564684559085136,
+ "loss": 1.0108,
+ "step": 1898
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756221443283789,
+ "loss": 0.5337,
+ "step": 1899
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755974322838382,
+ "loss": 1.4234,
+ "step": 1900
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755727094607527,
+ "loss": 0.9083,
+ "step": 1901
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017554797586264727,
+ "loss": 0.9199,
+ "step": 1902
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017552323149304844,
+ "loss": 1.1885,
+ "step": 1903
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754984763554842,
+ "loss": 1.276,
+ "step": 1904
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754737104534841,
+ "loss": 0.8882,
+ "step": 1905
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017544893379057918,
+ "loss": 0.993,
+ "step": 1906
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754241463703021,
+ "loss": 1.261,
+ "step": 1907
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017539934819618696,
+ "loss": 0.9877,
+ "step": 1908
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017537453927176947,
+ "loss": 0.9991,
+ "step": 1909
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017534971960058685,
+ "loss": 1.2012,
+ "step": 1910
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001753248891861778,
+ "loss": 0.864,
+ "step": 1911
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017530004803208263,
+ "loss": 1.0382,
+ "step": 1912
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017527519614184316,
+ "loss": 1.068,
+ "step": 1913
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017525033351900268,
+ "loss": 0.8687,
+ "step": 1914
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752254601671061,
+ "loss": 1.1174,
+ "step": 1915
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752005760896998,
+ "loss": 1.269,
+ "step": 1916
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751756812903317,
+ "loss": 0.7387,
+ "step": 1917
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751507757725513,
+ "loss": 0.8484,
+ "step": 1918
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001751258595399095,
+ "loss": 1.0092,
+ "step": 1919
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017510093259595885,
+ "loss": 1.0145,
+ "step": 1920
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017507599494425344,
+ "loss": 1.2969,
+ "step": 1921
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017505104658834875,
+ "loss": 0.7925,
+ "step": 1922
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017502608753180196,
+ "loss": 0.8974,
+ "step": 1923
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017500111777817164,
+ "loss": 0.764,
+ "step": 1924
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001749761373310179,
+ "loss": 1.1057,
+ "step": 1925
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017495114619390246,
+ "loss": 0.8092,
+ "step": 1926
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017492614437038845,
+ "loss": 0.9553,
+ "step": 1927
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017490113186404067,
+ "loss": 1.0278,
+ "step": 1928
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748761086784253,
+ "loss": 1.2152,
+ "step": 1929
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017485107481711012,
+ "loss": 1.5154,
+ "step": 1930
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748260302836644,
+ "loss": 1.1973,
+ "step": 1931
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017480097508165896,
+ "loss": 0.9429,
+ "step": 1932
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747759092146661,
+ "loss": 1.5453,
+ "step": 1933
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747508326862597,
+ "loss": 1.1691,
+ "step": 1934
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017472574550001508,
+ "loss": 1.2094,
+ "step": 1935
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017470064765950918,
+ "loss": 1.0777,
+ "step": 1936
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017467553916832035,
+ "loss": 1.0883,
+ "step": 1937
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017465042003002857,
+ "loss": 0.9297,
+ "step": 1938
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017462529024821522,
+ "loss": 0.7814,
+ "step": 1939
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017460014982646334,
+ "loss": 1.3645,
+ "step": 1940
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.0001745749987683573,
+ "loss": 1.0604,
+ "step": 1941
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017454983707748317,
+ "loss": 0.9416,
+ "step": 1942
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017452466475742845,
+ "loss": 1.4187,
+ "step": 1943
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017449948181178215,
+ "loss": 1.1619,
+ "step": 1944
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017447428824413482,
+ "loss": 1.1381,
+ "step": 1945
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017444908405807845,
+ "loss": 1.2304,
+ "step": 1946
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.0001744238692572067,
+ "loss": 1.2149,
+ "step": 1947
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017439864384511463,
+ "loss": 0.8172,
+ "step": 1948
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017437340782539877,
+ "loss": 1.0783,
+ "step": 1949
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017434816120165728,
+ "loss": 1.0661,
+ "step": 1950
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017432290397748982,
+ "loss": 1.1959,
+ "step": 1951
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001742976361564974,
+ "loss": 1.0581,
+ "step": 1952
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017427235774228274,
+ "loss": 0.8948,
+ "step": 1953
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017424706873845,
+ "loss": 1.2565,
+ "step": 1954
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017422176914860476,
+ "loss": 0.9237,
+ "step": 1955
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017419645897635432,
+ "loss": 1.219,
+ "step": 1956
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017417113822530727,
+ "loss": 1.4606,
+ "step": 1957
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017414580689907377,
+ "loss": 0.714,
+ "step": 1958
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001741204650012656,
+ "loss": 1.2223,
+ "step": 1959
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017409511253549593,
+ "loss": 0.9828,
+ "step": 1960
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017406974950537942,
+ "loss": 0.9954,
+ "step": 1961
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017404437591453235,
+ "loss": 1.0307,
+ "step": 1962
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001740189917665724,
+ "loss": 0.9331,
+ "step": 1963
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739935970651188,
+ "loss": 1.3517,
+ "step": 1964
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017396819181379232,
+ "loss": 1.2024,
+ "step": 1965
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739427760162151,
+ "loss": 0.9696,
+ "step": 1966
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017391734967601102,
+ "loss": 1.1559,
+ "step": 1967
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001738919127968052,
+ "loss": 1.3104,
+ "step": 1968
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017386646538222443,
+ "loss": 0.9073,
+ "step": 1969
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017384100743589697,
+ "loss": 1.0539,
+ "step": 1970
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017381553896145255,
+ "loss": 0.9873,
+ "step": 1971
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737900599625224,
+ "loss": 0.9466,
+ "step": 1972
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737645704427393,
+ "loss": 1.0639,
+ "step": 1973
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737390704057375,
+ "loss": 0.5843,
+ "step": 1974
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017371355985515275,
+ "loss": 1.1318,
+ "step": 1975
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017368803879462227,
+ "loss": 1.0116,
+ "step": 1976
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001736625072277848,
+ "loss": 0.8845,
+ "step": 1977
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017363696515828062,
+ "loss": 0.8081,
+ "step": 1978
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017361141258975148,
+ "loss": 0.8795,
+ "step": 1979
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735858495258406,
+ "loss": 0.9725,
+ "step": 1980
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735602759701927,
+ "loss": 1.0164,
+ "step": 1981
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017353469192645405,
+ "loss": 1.2937,
+ "step": 1982
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735090973982723,
+ "loss": 1.0842,
+ "step": 1983
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017348349238929678,
+ "loss": 1.0043,
+ "step": 1984
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017345787690317815,
+ "loss": 1.1302,
+ "step": 1985
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017343225094356855,
+ "loss": 1.195,
+ "step": 1986
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017340661451412183,
+ "loss": 1.1449,
+ "step": 1987
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017338096761849309,
+ "loss": 1.2244,
+ "step": 1988
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017335531026033897,
+ "loss": 0.9273,
+ "step": 1989
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017332964244331776,
+ "loss": 1.0448,
+ "step": 1990
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017330396417108908,
+ "loss": 1.0074,
+ "step": 1991
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017327827544731412,
+ "loss": 0.9284,
+ "step": 1992
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.0001732525762756555,
+ "loss": 1.0307,
+ "step": 1993
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017322686665977737,
+ "loss": 1.1526,
+ "step": 1994
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017320114660334535,
+ "loss": 0.819,
+ "step": 1995
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017317541611002656,
+ "loss": 1.1029,
+ "step": 1996
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017314967518348962,
+ "loss": 1.2471,
+ "step": 1997
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017312392382740462,
+ "loss": 1.0156,
+ "step": 1998
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017309816204544317,
+ "loss": 1.1843,
+ "step": 1999
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017307238984127832,
+ "loss": 1.1588,
+ "step": 2000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017304660721858457,
+ "loss": 1.0157,
+ "step": 2001
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.000173020814181038,
+ "loss": 1.0563,
+ "step": 2002
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017299501073231622,
+ "loss": 1.1883,
+ "step": 2003
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017296919687609808,
+ "loss": 0.9404,
+ "step": 2004
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017294337261606417,
+ "loss": 1.2495,
+ "step": 2005
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017291753795589643,
+ "loss": 1.0074,
+ "step": 2006
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017289169289927837,
+ "loss": 1.1411,
+ "step": 2007
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017286583744989488,
+ "loss": 0.9942,
+ "step": 2008
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017283997161143239,
+ "loss": 0.952,
+ "step": 2009
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017281409538757883,
+ "loss": 1.2966,
+ "step": 2010
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017278820878202357,
+ "loss": 1.0836,
+ "step": 2011
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727623117984575,
+ "loss": 1.0984,
+ "step": 2012
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727364044405729,
+ "loss": 0.8822,
+ "step": 2013
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017271048671206366,
+ "loss": 1.2014,
+ "step": 2014
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017268455861662503,
+ "loss": 1.1779,
+ "step": 2015
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017265862015795384,
+ "loss": 0.9966,
+ "step": 2016
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017263267133974832,
+ "loss": 0.9536,
+ "step": 2017
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017260671216570822,
+ "loss": 0.811,
+ "step": 2018
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017258074263953472,
+ "loss": 0.8241,
+ "step": 2019
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017255476276493056,
+ "loss": 1.1263,
+ "step": 2020
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017252877254559986,
+ "loss": 0.995,
+ "step": 2021
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001725027719852483,
+ "loss": 1.1481,
+ "step": 2022
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001724767610875829,
+ "loss": 1.129,
+ "step": 2023
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017245073985631238,
+ "loss": 0.5928,
+ "step": 2024
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017242470829514672,
+ "loss": 0.8326,
+ "step": 2025
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017239866640779745,
+ "loss": 1.1092,
+ "step": 2026
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017237261419797756,
+ "loss": 1.5015,
+ "step": 2027
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001723465516694016,
+ "loss": 0.9775,
+ "step": 2028
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017232047882578548,
+ "loss": 0.9348,
+ "step": 2029
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001722943956708466,
+ "loss": 0.6199,
+ "step": 2030
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017226830220830384,
+ "loss": 1.1485,
+ "step": 2031
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017224219844187764,
+ "loss": 1.1195,
+ "step": 2032
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017221608437528973,
+ "loss": 1.0528,
+ "step": 2033
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017218996001226345,
+ "loss": 1.1058,
+ "step": 2034
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017216382535652355,
+ "loss": 1.1451,
+ "step": 2035
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001721376804117963,
+ "loss": 1.2251,
+ "step": 2036
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017211152518180936,
+ "loss": 1.0708,
+ "step": 2037
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017208535967029188,
+ "loss": 1.0746,
+ "step": 2038
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017205918388097456,
+ "loss": 1.3262,
+ "step": 2039
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017203299781758943,
+ "loss": 0.7619,
+ "step": 2040
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017200680148387007,
+ "loss": 1.01,
+ "step": 2041
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001719805948835515,
+ "loss": 1.1651,
+ "step": 2042
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017195437802037026,
+ "loss": 1.4671,
+ "step": 2043
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017192815089806424,
+ "loss": 0.9857,
+ "step": 2044
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001719019135203729,
+ "loss": 1.2613,
+ "step": 2045
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017187566589103704,
+ "loss": 1.4386,
+ "step": 2046
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001718494080137991,
+ "loss": 1.0965,
+ "step": 2047
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017182313989240285,
+ "loss": 0.752,
+ "step": 2048
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017179686153059352,
+ "loss": 0.9126,
+ "step": 2049
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017177057293211784,
+ "loss": 1.5075,
+ "step": 2050
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.000171744274100724,
+ "loss": 1.0407,
+ "step": 2051
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017171796504016166,
+ "loss": 0.8263,
+ "step": 2052
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.0001716916457541819,
+ "loss": 0.9453,
+ "step": 2053
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017166531624653722,
+ "loss": 0.9777,
+ "step": 2054
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017163897652098172,
+ "loss": 1.2129,
+ "step": 2055
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017161262658127086,
+ "loss": 1.3642,
+ "step": 2056
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017158626643116152,
+ "loss": 0.6798,
+ "step": 2057
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017155989607441213,
+ "loss": 0.874,
+ "step": 2058
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017153351551478247,
+ "loss": 1.0636,
+ "step": 2059
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001715071247560339,
+ "loss": 1.0563,
+ "step": 2060
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714807238019291,
+ "loss": 1.1984,
+ "step": 2061
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017145431265623234,
+ "loss": 0.9444,
+ "step": 2062
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714278913227092,
+ "loss": 0.7809,
+ "step": 2063
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017140145980512684,
+ "loss": 1.649,
+ "step": 2064
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713750181072538,
+ "loss": 1.0956,
+ "step": 2065
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713485662328601,
+ "loss": 1.2845,
+ "step": 2066
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017132210418571714,
+ "loss": 1.0484,
+ "step": 2067
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017129563196959793,
+ "loss": 1.0291,
+ "step": 2068
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017126914958827679,
+ "loss": 1.1226,
+ "step": 2069
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.0001712426570455295,
+ "loss": 1.0119,
+ "step": 2070
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017121615434513332,
+ "loss": 1.1663,
+ "step": 2071
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.000171189641490867,
+ "loss": 1.1353,
+ "step": 2072
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017116311848651064,
+ "loss": 1.0761,
+ "step": 2073
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017113658533584594,
+ "loss": 1.1978,
+ "step": 2074
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017111004204265582,
+ "loss": 1.3881,
+ "step": 2075
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017108348861072484,
+ "loss": 1.3945,
+ "step": 2076
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017105692504383897,
+ "loss": 1.3796,
+ "step": 2077
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017103035134578555,
+ "loss": 1.1721,
+ "step": 2078
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001710037675203534,
+ "loss": 1.0061,
+ "step": 2079
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017097717357133284,
+ "loss": 1.2456,
+ "step": 2080
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017095056950251555,
+ "loss": 0.788,
+ "step": 2081
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001709239553176947,
+ "loss": 1.16,
+ "step": 2082
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001708973310206649,
+ "loss": 1.0498,
+ "step": 2083
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017087069661522218,
+ "loss": 0.8993,
+ "step": 2084
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017084405210516406,
+ "loss": 1.2088,
+ "step": 2085
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001708173974942894,
+ "loss": 1.0897,
+ "step": 2086
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017079073278639863,
+ "loss": 1.2718,
+ "step": 2087
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017076405798529355,
+ "loss": 1.2325,
+ "step": 2088
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017073737309477736,
+ "loss": 1.0555,
+ "step": 2089
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017071067811865476,
+ "loss": 1.1428,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 2.3191208839416504,
+ "eval_runtime": 284.1375,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706839730607319,
+ "loss": 1.0908,
+ "step": 2091
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706572579248163,
+ "loss": 1.2092,
+ "step": 2092
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.000170630532714717,
+ "loss": 1.1735,
+ "step": 2093
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001706037974342444,
+ "loss": 1.2716,
+ "step": 2094
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017057705208721035,
+ "loss": 1.0095,
+ "step": 2095
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001705502966774282,
+ "loss": 1.3059,
+ "step": 2096
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017052353120871266,
+ "loss": 0.8269,
+ "step": 2097
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001704967556848799,
+ "loss": 1.0615,
+ "step": 2098
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017046997010974755,
+ "loss": 1.2709,
+ "step": 2099
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017044317448713461,
+ "loss": 1.1633,
+ "step": 2100
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017041636882086158,
+ "loss": 0.9273,
+ "step": 2101
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017038955311475038,
+ "loss": 1.3117,
+ "step": 2102
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001703627273726243,
+ "loss": 0.8883,
+ "step": 2103
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017033589159830815,
+ "loss": 1.1371,
+ "step": 2104
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017030904579562806,
+ "loss": 1.5402,
+ "step": 2105
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017028218996841172,
+ "loss": 0.9156,
+ "step": 2106
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017025532412048817,
+ "loss": 1.0962,
+ "step": 2107
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001702284482556879,
+ "loss": 0.9402,
+ "step": 2108
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017020156237784279,
+ "loss": 0.8146,
+ "step": 2109
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001701746664907862,
+ "loss": 1.1718,
+ "step": 2110
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017014776059835288,
+ "loss": 1.0618,
+ "step": 2111
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017012084470437907,
+ "loss": 1.4796,
+ "step": 2112
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017009391881270237,
+ "loss": 0.8402,
+ "step": 2113
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017006698292716178,
+ "loss": 1.1641,
+ "step": 2114
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001700400370515978,
+ "loss": 1.241,
+ "step": 2115
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017001308118985237,
+ "loss": 0.8683,
+ "step": 2116
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00016998611534576873,
+ "loss": 1.2697,
+ "step": 2117
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016995913952319168,
+ "loss": 0.9233,
+ "step": 2118
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016993215372596737,
+ "loss": 1.2472,
+ "step": 2119
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016990515795794334,
+ "loss": 1.2541,
+ "step": 2120
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016987815222296865,
+ "loss": 1.0016,
+ "step": 2121
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016985113652489374,
+ "loss": 1.0678,
+ "step": 2122
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016982411086757037,
+ "loss": 1.6066,
+ "step": 2123
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016979707525485192,
+ "loss": 1.229,
+ "step": 2124
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016977002969059302,
+ "loss": 0.752,
+ "step": 2125
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016974297417864977,
+ "loss": 0.8752,
+ "step": 2126
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001697159087228797,
+ "loss": 0.8896,
+ "step": 2127
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016968883332714186,
+ "loss": 0.9657,
+ "step": 2128
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696617479952964,
+ "loss": 1.3657,
+ "step": 2129
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696346527312053,
+ "loss": 0.9876,
+ "step": 2130
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016960754753873162,
+ "loss": 1.0165,
+ "step": 2131
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016958043242174003,
+ "loss": 1.625,
+ "step": 2132
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016955330738409655,
+ "loss": 1.5502,
+ "step": 2133
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016952617242966864,
+ "loss": 1.0793,
+ "step": 2134
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016949902756232507,
+ "loss": 1.4425,
+ "step": 2135
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016947187278593622,
+ "loss": 1.3124,
+ "step": 2136
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016944470810437365,
+ "loss": 0.927,
+ "step": 2137
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016941753352151055,
+ "loss": 1.1911,
+ "step": 2138
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016939034904122138,
+ "loss": 1.0768,
+ "step": 2139
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016936315466738205,
+ "loss": 1.1277,
+ "step": 2140
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016933595040386984,
+ "loss": 0.812,
+ "step": 2141
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.0001693087362545636,
+ "loss": 0.8299,
+ "step": 2142
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016928151222334338,
+ "loss": 1.1125,
+ "step": 2143
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016925427831409077,
+ "loss": 1.1835,
+ "step": 2144
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016922703453068873,
+ "loss": 1.2007,
+ "step": 2145
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016919978087702163,
+ "loss": 0.8524,
+ "step": 2146
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016917251735697523,
+ "loss": 0.9497,
+ "step": 2147
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016914524397443673,
+ "loss": 1.1004,
+ "step": 2148
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016911796073329466,
+ "loss": 0.8347,
+ "step": 2149
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016909066763743912,
+ "loss": 0.9492,
+ "step": 2150
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016906336469076148,
+ "loss": 1.1406,
+ "step": 2151
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016903605189715447,
+ "loss": 1.0137,
+ "step": 2152
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001690087292605124,
+ "loss": 1.0624,
+ "step": 2153
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016898139678473076,
+ "loss": 1.1767,
+ "step": 2154
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001689540544737067,
+ "loss": 1.4184,
+ "step": 2155
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016892670233133856,
+ "loss": 0.957,
+ "step": 2156
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016889934036152618,
+ "loss": 1.0399,
+ "step": 2157
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016887196856817073,
+ "loss": 1.2009,
+ "step": 2158
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016884458695517495,
+ "loss": 1.3977,
+ "step": 2159
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016881719552644273,
+ "loss": 1.1328,
+ "step": 2160
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016878979428587955,
+ "loss": 1.5007,
+ "step": 2161
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016876238323739221,
+ "loss": 1.1248,
+ "step": 2162
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016873496238488899,
+ "loss": 1.0358,
+ "step": 2163
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016870753173227945,
+ "loss": 1.2961,
+ "step": 2164
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016868009128347459,
+ "loss": 0.9435,
+ "step": 2165
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016865264104238683,
+ "loss": 0.9642,
+ "step": 2166
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016862518101293,
+ "loss": 1.0169,
+ "step": 2167
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016859771119901929,
+ "loss": 1.0904,
+ "step": 2168
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.0001685702316045713,
+ "loss": 1.3178,
+ "step": 2169
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016854274223350397,
+ "loss": 1.1395,
+ "step": 2170
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016851524308973678,
+ "loss": 1.1207,
+ "step": 2171
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016848773417719044,
+ "loss": 1.3544,
+ "step": 2172
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016846021549978715,
+ "loss": 1.3503,
+ "step": 2173
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016843268706145042,
+ "loss": 1.4276,
+ "step": 2174
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016840514886610529,
+ "loss": 0.9888,
+ "step": 2175
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016837760091767802,
+ "loss": 1.0913,
+ "step": 2176
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001683500432200964,
+ "loss": 1.4781,
+ "step": 2177
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016832247577728955,
+ "loss": 1.2657,
+ "step": 2178
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.000168294898593188,
+ "loss": 0.9206,
+ "step": 2179
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682673116717236,
+ "loss": 0.9218,
+ "step": 2180
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682397150168297,
+ "loss": 1.2719,
+ "step": 2181
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016821210863244096,
+ "loss": 0.984,
+ "step": 2182
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016818449252249345,
+ "loss": 1.4641,
+ "step": 2183
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001681568666909246,
+ "loss": 1.2571,
+ "step": 2184
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016812923114167328,
+ "loss": 1.2025,
+ "step": 2185
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016810158587867973,
+ "loss": 0.9621,
+ "step": 2186
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016807393090588553,
+ "loss": 1.0016,
+ "step": 2187
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016804626622723368,
+ "loss": 1.031,
+ "step": 2188
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016801859184666857,
+ "loss": 0.7573,
+ "step": 2189
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016799090776813597,
+ "loss": 1.2694,
+ "step": 2190
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.000167963213995583,
+ "loss": 1.196,
+ "step": 2191
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016793551053295822,
+ "loss": 0.8754,
+ "step": 2192
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016790779738421152,
+ "loss": 1.1743,
+ "step": 2193
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678800745532942,
+ "loss": 1.0921,
+ "step": 2194
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016785234204415888,
+ "loss": 0.8778,
+ "step": 2195
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678245998607597,
+ "loss": 1.0528,
+ "step": 2196
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016779684800705203,
+ "loss": 1.0255,
+ "step": 2197
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001677690864869927,
+ "loss": 0.6344,
+ "step": 2198
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016774131530453992,
+ "loss": 0.8691,
+ "step": 2199
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016771353446365318,
+ "loss": 1.2061,
+ "step": 2200
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001676857439682935,
+ "loss": 1.1759,
+ "step": 2201
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016765794382242314,
+ "loss": 1.1118,
+ "step": 2202
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016763013403000584,
+ "loss": 1.3005,
+ "step": 2203
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016760231459500666,
+ "loss": 1.0415,
+ "step": 2204
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.000167574485521392,
+ "loss": 0.824,
+ "step": 2205
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016754664681312975,
+ "loss": 0.6682,
+ "step": 2206
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016751879847418905,
+ "loss": 1.9204,
+ "step": 2207
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016749094050854047,
+ "loss": 0.9931,
+ "step": 2208
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016746307292015602,
+ "loss": 0.8898,
+ "step": 2209
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016743519571300888,
+ "loss": 1.3337,
+ "step": 2210
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016740730889107383,
+ "loss": 1.2947,
+ "step": 2211
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673794124583269,
+ "loss": 1.1882,
+ "step": 2212
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673515064187455,
+ "loss": 1.5408,
+ "step": 2213
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016732359077630847,
+ "loss": 1.1273,
+ "step": 2214
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001672956655349959,
+ "loss": 0.8954,
+ "step": 2215
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016726773069878934,
+ "loss": 1.1747,
+ "step": 2216
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016723978627167173,
+ "loss": 0.807,
+ "step": 2217
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016721183225762727,
+ "loss": 1.2512,
+ "step": 2218
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016718386866064166,
+ "loss": 1.0796,
+ "step": 2219
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016715589548470185,
+ "loss": 1.0905,
+ "step": 2220
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016712791273379622,
+ "loss": 1.3779,
+ "step": 2221
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016709992041191452,
+ "loss": 1.2015,
+ "step": 2222
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016707191852304782,
+ "loss": 0.8612,
+ "step": 2223
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001670439070711886,
+ "loss": 1.1819,
+ "step": 2224
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016701588606033064,
+ "loss": 1.2715,
+ "step": 2225
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001669878554944692,
+ "loss": 1.3681,
+ "step": 2226
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016695981537760072,
+ "loss": 1.1254,
+ "step": 2227
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669317657137232,
+ "loss": 0.9476,
+ "step": 2228
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669037065068359,
+ "loss": 1.235,
+ "step": 2229
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016687563776093941,
+ "loss": 0.7356,
+ "step": 2230
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016684755948003573,
+ "loss": 0.7901,
+ "step": 2231
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016681947166812824,
+ "loss": 1.317,
+ "step": 2232
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016679137432922163,
+ "loss": 0.8832,
+ "step": 2233
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016676326746732195,
+ "loss": 1.2776,
+ "step": 2234
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016673515108643665,
+ "loss": 1.0435,
+ "step": 2235
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001667070251905745,
+ "loss": 1.0957,
+ "step": 2236
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016667888978374567,
+ "loss": 1.0862,
+ "step": 2237
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016665074486996165,
+ "loss": 1.1112,
+ "step": 2238
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001666225904532352,
+ "loss": 1.3633,
+ "step": 2239
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016659442653758064,
+ "loss": 1.444,
+ "step": 2240
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016656625312701348,
+ "loss": 0.8248,
+ "step": 2241
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016653807022555067,
+ "loss": 1.2522,
+ "step": 2242
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001665098778372104,
+ "loss": 1.2107,
+ "step": 2243
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001664816759660124,
+ "loss": 1.0813,
+ "step": 2244
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016645346461597753,
+ "loss": 1.1136,
+ "step": 2245
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016642524379112817,
+ "loss": 1.1003,
+ "step": 2246
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.000166397013495488,
+ "loss": 1.0635,
+ "step": 2247
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016636877373308204,
+ "loss": 1.0575,
+ "step": 2248
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016634052450793663,
+ "loss": 0.7693,
+ "step": 2249
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016631226582407952,
+ "loss": 1.5965,
+ "step": 2250
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001662839976855398,
+ "loss": 1.0989,
+ "step": 2251
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016625572009634787,
+ "loss": 0.9198,
+ "step": 2252
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016622743306053548,
+ "loss": 1.0896,
+ "step": 2253
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016619913658213578,
+ "loss": 1.015,
+ "step": 2254
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.0001661708306651832,
+ "loss": 0.8572,
+ "step": 2255
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016614251531371353,
+ "loss": 1.1508,
+ "step": 2256
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.000166114190531764,
+ "loss": 1.1852,
+ "step": 2257
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016608585632337306,
+ "loss": 0.932,
+ "step": 2258
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016605751269258053,
+ "loss": 1.2542,
+ "step": 2259
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016602915964342757,
+ "loss": 0.943,
+ "step": 2260
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016600079717995678,
+ "loss": 1.2438,
+ "step": 2261
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016597242530621203,
+ "loss": 0.9928,
+ "step": 2262
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016594404402623845,
+ "loss": 0.9516,
+ "step": 2263
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016591565334408265,
+ "loss": 1.1689,
+ "step": 2264
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.0001658872532637925,
+ "loss": 1.3155,
+ "step": 2265
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016585884378941725,
+ "loss": 1.1596,
+ "step": 2266
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016583042492500746,
+ "loss": 0.9956,
+ "step": 2267
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016580199667461508,
+ "loss": 0.9289,
+ "step": 2268
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016577355904229325,
+ "loss": 1.3225,
+ "step": 2269
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016574511203209667,
+ "loss": 1.0384,
+ "step": 2270
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001657166556480812,
+ "loss": 0.697,
+ "step": 2271
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016568818989430416,
+ "loss": 0.7702,
+ "step": 2272
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016565971477482404,
+ "loss": 1.1041,
+ "step": 2273
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016563123029370093,
+ "loss": 1.0462,
+ "step": 2274
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001656027364549959,
+ "loss": 1.0797,
+ "step": 2275
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001655742332627717,
+ "loss": 1.3301,
+ "step": 2276
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.0001655457207210922,
+ "loss": 1.0467,
+ "step": 2277
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016551719883402271,
+ "loss": 0.9432,
+ "step": 2278
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016548866760562978,
+ "loss": 1.1808,
+ "step": 2279
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016546012703998138,
+ "loss": 1.1094,
+ "step": 2280
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016543157714114673,
+ "loss": 1.3914,
+ "step": 2281
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016540301791319645,
+ "loss": 1.0402,
+ "step": 2282
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016537444936020246,
+ "loss": 0.9815,
+ "step": 2283
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.000165345871486238,
+ "loss": 0.9722,
+ "step": 2284
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016531728429537766,
+ "loss": 0.919,
+ "step": 2285
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016528868779169738,
+ "loss": 1.1242,
+ "step": 2286
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016526008197927436,
+ "loss": 1.1794,
+ "step": 2287
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016523146686218718,
+ "loss": 1.434,
+ "step": 2288
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016520284244451574,
+ "loss": 0.8463,
+ "step": 2289
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016517420873034123,
+ "loss": 1.1736,
+ "step": 2290
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.0001651455657237462,
+ "loss": 1.0431,
+ "step": 2291
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016511691342881453,
+ "loss": 1.2796,
+ "step": 2292
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650882518496314,
+ "loss": 1.0578,
+ "step": 2293
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016505958099028334,
+ "loss": 1.3914,
+ "step": 2294
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650309008548582,
+ "loss": 1.0046,
+ "step": 2295
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650022114474451,
+ "loss": 1.0246,
+ "step": 2296
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016497351277213458,
+ "loss": 1.2789,
+ "step": 2297
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016494480483301836,
+ "loss": 1.0036,
+ "step": 2298
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016491608763418968,
+ "loss": 0.886,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "eval_loss": 2.3017475605010986,
+ "eval_runtime": 283.8846,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648873611797429,
+ "loss": 1.3953,
+ "step": 2300
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648586254737738,
+ "loss": 0.6972,
+ "step": 2301
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016482988052037947,
+ "loss": 1.2311,
+ "step": 2302
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016480112632365833,
+ "loss": 1.327,
+ "step": 2303
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647723628877101,
+ "loss": 0.9534,
+ "step": 2304
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647435902166358,
+ "loss": 0.9164,
+ "step": 2305
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647148083145378,
+ "loss": 1.1038,
+ "step": 2306
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016468601718551976,
+ "loss": 1.0444,
+ "step": 2307
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016465721683368666,
+ "loss": 1.2635,
+ "step": 2308
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016462840726314486,
+ "loss": 1.1647,
+ "step": 2309
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016459958847800187,
+ "loss": 1.3617,
+ "step": 2310
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016457076048236675,
+ "loss": 1.2355,
+ "step": 2311
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016454192328034962,
+ "loss": 0.9989,
+ "step": 2312
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016451307687606213,
+ "loss": 1.1218,
+ "step": 2313
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016448422127361706,
+ "loss": 0.8967,
+ "step": 2314
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644553564771287,
+ "loss": 1.159,
+ "step": 2315
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644264824907124,
+ "loss": 1.5901,
+ "step": 2316
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001643975993184851,
+ "loss": 0.979,
+ "step": 2317
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016436870696456482,
+ "loss": 0.8561,
+ "step": 2318
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016433980543307107,
+ "loss": 0.9485,
+ "step": 2319
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016431089472812444,
+ "loss": 0.7736,
+ "step": 2320
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016428197485384707,
+ "loss": 1.2546,
+ "step": 2321
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016425304581436226,
+ "loss": 0.9534,
+ "step": 2322
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001642241076137947,
+ "loss": 0.8182,
+ "step": 2323
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641951602562703,
+ "loss": 1.1107,
+ "step": 2324
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641662037459164,
+ "loss": 1.0628,
+ "step": 2325
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016413723808686147,
+ "loss": 1.6261,
+ "step": 2326
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001641082632832354,
+ "loss": 1.0286,
+ "step": 2327
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001640792793391694,
+ "loss": 0.5732,
+ "step": 2328
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016405028625879594,
+ "loss": 1.0932,
+ "step": 2329
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016402128404624882,
+ "loss": 1.2585,
+ "step": 2330
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016399227270566308,
+ "loss": 0.8788,
+ "step": 2331
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639632522411751,
+ "loss": 1.1397,
+ "step": 2332
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016393422265692262,
+ "loss": 1.3517,
+ "step": 2333
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639051839570446,
+ "loss": 1.1346,
+ "step": 2334
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016387613614568126,
+ "loss": 0.9594,
+ "step": 2335
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001638470792269743,
+ "loss": 1.0674,
+ "step": 2336
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016381801320506653,
+ "loss": 0.9123,
+ "step": 2337
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016378893808410215,
+ "loss": 1.1909,
+ "step": 2338
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016375985386822664,
+ "loss": 1.0474,
+ "step": 2339
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016373076056158675,
+ "loss": 0.8844,
+ "step": 2340
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001637016581683306,
+ "loss": 1.1606,
+ "step": 2341
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016367254669260749,
+ "loss": 0.6206,
+ "step": 2342
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016364342613856816,
+ "loss": 0.7225,
+ "step": 2343
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016361429651036446,
+ "loss": 1.1782,
+ "step": 2344
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016358515781214977,
+ "loss": 1.0911,
+ "step": 2345
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016355601004807856,
+ "loss": 1.2727,
+ "step": 2346
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016352685322230663,
+ "loss": 0.8294,
+ "step": 2347
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016349768733899117,
+ "loss": 1.1661,
+ "step": 2348
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016346851240229057,
+ "loss": 0.8267,
+ "step": 2349
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016343932841636456,
+ "loss": 1.2873,
+ "step": 2350
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016341013538537412,
+ "loss": 1.2459,
+ "step": 2351
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016338093331348156,
+ "loss": 0.8939,
+ "step": 2352
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016335172220485042,
+ "loss": 1.024,
+ "step": 2353
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.0001633225020636456,
+ "loss": 0.9981,
+ "step": 2354
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016329327289403325,
+ "loss": 1.331,
+ "step": 2355
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016326403470018084,
+ "loss": 0.7446,
+ "step": 2356
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016323478748625703,
+ "loss": 1.1931,
+ "step": 2357
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016320553125643187,
+ "loss": 1.1287,
+ "step": 2358
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016317626601487667,
+ "loss": 1.109,
+ "step": 2359
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016314699176576402,
+ "loss": 0.9946,
+ "step": 2360
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016311770851326778,
+ "loss": 0.8347,
+ "step": 2361
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016308841626156307,
+ "loss": 0.9214,
+ "step": 2362
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.0001630591150148264,
+ "loss": 0.5907,
+ "step": 2363
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016302980477723539,
+ "loss": 1.2412,
+ "step": 2364
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016300048555296915,
+ "loss": 1.2908,
+ "step": 2365
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016297115734620788,
+ "loss": 1.2345,
+ "step": 2366
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016294182016113315,
+ "loss": 1.0418,
+ "step": 2367
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016291247400192785,
+ "loss": 1.1457,
+ "step": 2368
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016288311887277608,
+ "loss": 1.2529,
+ "step": 2369
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016285375477786322,
+ "loss": 1.0013,
+ "step": 2370
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016282438172137597,
+ "loss": 0.943,
+ "step": 2371
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016279499970750226,
+ "loss": 0.7009,
+ "step": 2372
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016276560874043137,
+ "loss": 0.9408,
+ "step": 2373
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627362088243538,
+ "loss": 1.1788,
+ "step": 2374
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627067999634613,
+ "loss": 0.8106,
+ "step": 2375
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016267738216194696,
+ "loss": 1.1695,
+ "step": 2376
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001626479554240051,
+ "loss": 0.9209,
+ "step": 2377
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016261851975383137,
+ "loss": 0.9911,
+ "step": 2378
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016258907515562262,
+ "loss": 1.3819,
+ "step": 2379
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162559621633577,
+ "loss": 0.8926,
+ "step": 2380
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162530159191894,
+ "loss": 1.0896,
+ "step": 2381
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016250068783477424,
+ "loss": 0.8403,
+ "step": 2382
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016247120756641972,
+ "loss": 0.7976,
+ "step": 2383
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001624417183910337,
+ "loss": 0.8881,
+ "step": 2384
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001624122203128207,
+ "loss": 0.8302,
+ "step": 2385
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623827133359865,
+ "loss": 1.3312,
+ "step": 2386
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623531974647381,
+ "loss": 1.003,
+ "step": 2387
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623236727032839,
+ "loss": 0.9487,
+ "step": 2388
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016229413905583342,
+ "loss": 1.2259,
+ "step": 2389
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016226459652659753,
+ "loss": 0.9327,
+ "step": 2390
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016223504511978838,
+ "loss": 0.7336,
+ "step": 2391
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016220548483961934,
+ "loss": 1.0454,
+ "step": 2392
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016217591569030505,
+ "loss": 1.3371,
+ "step": 2393
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016214633767606143,
+ "loss": 1.0814,
+ "step": 2394
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016211675080110566,
+ "loss": 1.2274,
+ "step": 2395
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001620871550696562,
+ "loss": 0.9775,
+ "step": 2396
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016205755048593273,
+ "loss": 1.0323,
+ "step": 2397
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016202793705415622,
+ "loss": 1.5101,
+ "step": 2398
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016199831477854893,
+ "loss": 0.8118,
+ "step": 2399
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001619686836633343,
+ "loss": 1.0233,
+ "step": 2400
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016193904371273715,
+ "loss": 0.9038,
+ "step": 2401
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016190939493098344,
+ "loss": 0.875,
+ "step": 2402
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016187973732230038,
+ "loss": 1.3274,
+ "step": 2403
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016185007089091665,
+ "loss": 1.081,
+ "step": 2404
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016182039564106192,
+ "loss": 1.0841,
+ "step": 2405
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016179071157696728,
+ "loss": 1.3208,
+ "step": 2406
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.000161761018702865,
+ "loss": 1.1854,
+ "step": 2407
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617313170229887,
+ "loss": 1.0651,
+ "step": 2408
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617016065415731,
+ "loss": 1.1398,
+ "step": 2409
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016167188726285434,
+ "loss": 1.2778,
+ "step": 2410
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016164215919106968,
+ "loss": 1.6758,
+ "step": 2411
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.0001616124223304577,
+ "loss": 0.8341,
+ "step": 2412
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016158267668525832,
+ "loss": 0.9513,
+ "step": 2413
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016155292225971253,
+ "loss": 0.9617,
+ "step": 2414
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016152315905806268,
+ "loss": 0.8664,
+ "step": 2415
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016149338708455237,
+ "loss": 1.331,
+ "step": 2416
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016146360634342643,
+ "loss": 1.4212,
+ "step": 2417
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016143381683893094,
+ "loss": 1.2126,
+ "step": 2418
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016140401857531322,
+ "loss": 0.934,
+ "step": 2419
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016137421155682183,
+ "loss": 1.2417,
+ "step": 2420
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001613443957877067,
+ "loss": 1.637,
+ "step": 2421
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016131457127221881,
+ "loss": 1.1456,
+ "step": 2422
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016128473801461053,
+ "loss": 0.9402,
+ "step": 2423
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612548960191354,
+ "loss": 1.3797,
+ "step": 2424
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612250452900483,
+ "loss": 0.8191,
+ "step": 2425
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001611951858316052,
+ "loss": 1.1725,
+ "step": 2426
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016116531764806346,
+ "loss": 1.5701,
+ "step": 2427
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016113544074368164,
+ "loss": 1.0591,
+ "step": 2428
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016110555512271953,
+ "loss": 1.03,
+ "step": 2429
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.0001610756607894382,
+ "loss": 1.1829,
+ "step": 2430
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016104575774809985,
+ "loss": 1.2222,
+ "step": 2431
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016101584600296804,
+ "loss": 1.1537,
+ "step": 2432
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016098592555830753,
+ "loss": 1.0973,
+ "step": 2433
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016095599641838436,
+ "loss": 1.0793,
+ "step": 2434
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016092605858746573,
+ "loss": 1.3484,
+ "step": 2435
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608961120698201,
+ "loss": 1.1689,
+ "step": 2436
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016086615686971726,
+ "loss": 1.0864,
+ "step": 2437
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016083619299142813,
+ "loss": 1.2451,
+ "step": 2438
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608062204392249,
+ "loss": 0.9593,
+ "step": 2439
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016077623921738102,
+ "loss": 0.9816,
+ "step": 2440
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016074624933017112,
+ "loss": 1.0845,
+ "step": 2441
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016071625078187114,
+ "loss": 0.9875,
+ "step": 2442
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001606862435767582,
+ "loss": 0.8758,
+ "step": 2443
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016065622771911067,
+ "loss": 0.9499,
+ "step": 2444
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016062620321320823,
+ "loss": 1.1133,
+ "step": 2445
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605961700633316,
+ "loss": 0.7228,
+ "step": 2446
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016056612827376293,
+ "loss": 1.2297,
+ "step": 2447
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605360778487855,
+ "loss": 1.0251,
+ "step": 2448
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016050601879268386,
+ "loss": 0.8097,
+ "step": 2449
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016047595110974376,
+ "loss": 0.9872,
+ "step": 2450
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001604458748042522,
+ "loss": 1.1119,
+ "step": 2451
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.0001604157898804974,
+ "loss": 0.8256,
+ "step": 2452
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016038569634276882,
+ "loss": 0.9036,
+ "step": 2453
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016035559419535716,
+ "loss": 1.1173,
+ "step": 2454
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016032548344255428,
+ "loss": 1.3173,
+ "step": 2455
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016029536408865337,
+ "loss": 0.717,
+ "step": 2456
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016026523613794878,
+ "loss": 0.9806,
+ "step": 2457
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016023509959473605,
+ "loss": 1.1509,
+ "step": 2458
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016020495446331207,
+ "loss": 1.0454,
+ "step": 2459
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601748007479748,
+ "loss": 1.183,
+ "step": 2460
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601446384530236,
+ "loss": 1.2611,
+ "step": 2461
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016011446758275888,
+ "loss": 1.0377,
+ "step": 2462
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016008428814148236,
+ "loss": 1.2111,
+ "step": 2463
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016005410013349698,
+ "loss": 1.0952,
+ "step": 2464
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016002390356310685,
+ "loss": 0.7589,
+ "step": 2465
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015999369843461742,
+ "loss": 0.8543,
+ "step": 2466
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015996348475233525,
+ "loss": 1.1509,
+ "step": 2467
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001599332625205681,
+ "loss": 1.287,
+ "step": 2468
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015990303174362512,
+ "loss": 1.0401,
+ "step": 2469
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001598727924258164,
+ "loss": 1.0247,
+ "step": 2470
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015984254457145354,
+ "loss": 1.1537,
+ "step": 2471
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015981228818484917,
+ "loss": 0.9606,
+ "step": 2472
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001597820232703172,
+ "loss": 0.8709,
+ "step": 2473
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015975174983217275,
+ "loss": 1.2827,
+ "step": 2474
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015972146787473213,
+ "loss": 0.8057,
+ "step": 2475
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001596911774023129,
+ "loss": 1.0857,
+ "step": 2476
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015966087841923386,
+ "loss": 1.1731,
+ "step": 2477
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001596305709298149,
+ "loss": 0.8871,
+ "step": 2478
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015960025493837727,
+ "loss": 1.0671,
+ "step": 2479
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015956993044924334,
+ "loss": 1.3735,
+ "step": 2480
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015953959746673675,
+ "loss": 1.4655,
+ "step": 2481
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015950925599518228,
+ "loss": 1.3975,
+ "step": 2482
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015947890603890602,
+ "loss": 0.9468,
+ "step": 2483
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001594485476022352,
+ "loss": 0.9976,
+ "step": 2484
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015941818068949818,
+ "loss": 0.6732,
+ "step": 2485
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015938780530502474,
+ "loss": 0.9848,
+ "step": 2486
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015935742145314568,
+ "loss": 1.2441,
+ "step": 2487
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001593270291381931,
+ "loss": 0.9631,
+ "step": 2488
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015929662836450029,
+ "loss": 0.8868,
+ "step": 2489
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592662191364017,
+ "loss": 0.9063,
+ "step": 2490
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015923580145823303,
+ "loss": 0.6886,
+ "step": 2491
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592053753343312,
+ "loss": 1.0702,
+ "step": 2492
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591749407690343,
+ "loss": 1.3879,
+ "step": 2493
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015914449776668167,
+ "loss": 1.1048,
+ "step": 2494
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591140463316137,
+ "loss": 0.9921,
+ "step": 2495
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015908358646817225,
+ "loss": 1.3042,
+ "step": 2496
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015905311818070015,
+ "loss": 0.8413,
+ "step": 2497
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015902264147354153,
+ "loss": 1.5201,
+ "step": 2498
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589921563510417,
+ "loss": 1.0727,
+ "step": 2499
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589616628175472,
+ "loss": 1.0439,
+ "step": 2500
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589311608774057,
+ "loss": 1.2308,
+ "step": 2501
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015890065053496613,
+ "loss": 1.1155,
+ "step": 2502
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015887013179457862,
+ "loss": 1.3345,
+ "step": 2503
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015883960466059444,
+ "loss": 0.9551,
+ "step": 2504
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001588090691373661,
+ "loss": 1.0713,
+ "step": 2505
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015877852522924732,
+ "loss": 1.299,
+ "step": 2506
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.000158747972940593,
+ "loss": 0.8535,
+ "step": 2507
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001587174122757592,
+ "loss": 0.9924,
+ "step": 2508
+ },
+ {
+ "epoch": 2.98,
+ "eval_loss": 2.328662395477295,
+ "eval_runtime": 283.7765,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2508
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001586868432391032,
+ "loss": 1.0512,
+ "step": 2509
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015865626583498355,
+ "loss": 1.2775,
+ "step": 2510
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015862568006775983,
+ "loss": 0.7054,
+ "step": 2511
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015859508594179294,
+ "loss": 0.8524,
+ "step": 2512
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015856448346144496,
+ "loss": 0.9871,
+ "step": 2513
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015853387263107909,
+ "loss": 0.8642,
+ "step": 2514
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015850325345505975,
+ "loss": 1.1789,
+ "step": 2515
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015847262593775266,
+ "loss": 1.2765,
+ "step": 2516
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015844199008352458,
+ "loss": 0.6272,
+ "step": 2517
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015841134589674352,
+ "loss": 1.3037,
+ "step": 2518
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015838069338177863,
+ "loss": 1.054,
+ "step": 2519
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015835003254300039,
+ "loss": 1.1942,
+ "step": 2520
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015831936338478025,
+ "loss": 0.8866,
+ "step": 2521
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015828868591149104,
+ "loss": 1.1444,
+ "step": 2522
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015825800012750666,
+ "loss": 0.8597,
+ "step": 2523
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001582273060372023,
+ "loss": 0.7731,
+ "step": 2524
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015819660364495416,
+ "loss": 1.1953,
+ "step": 2525
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001581658929551398,
+ "loss": 1.3946,
+ "step": 2526
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015813517397213791,
+ "loss": 1.0173,
+ "step": 2527
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015810444670032831,
+ "loss": 1.1762,
+ "step": 2528
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015807371114409202,
+ "loss": 0.7283,
+ "step": 2529
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015804296730781135,
+ "loss": 1.1515,
+ "step": 2530
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015801221519586958,
+ "loss": 0.9389,
+ "step": 2531
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001579814548126514,
+ "loss": 1.1869,
+ "step": 2532
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015795068616254247,
+ "loss": 1.2957,
+ "step": 2533
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015791990924992981,
+ "loss": 1.0514,
+ "step": 2534
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015788912407920148,
+ "loss": 0.6762,
+ "step": 2535
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015785833065474683,
+ "loss": 0.4121,
+ "step": 2536
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015782752898095627,
+ "loss": 0.4532,
+ "step": 2537
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001577967190622215,
+ "loss": 0.4847,
+ "step": 2538
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001577659009029353,
+ "loss": 0.8313,
+ "step": 2539
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015773507450749172,
+ "loss": 0.5304,
+ "step": 2540
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015770423988028588,
+ "loss": 0.6003,
+ "step": 2541
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015767339702571414,
+ "loss": 0.3988,
+ "step": 2542
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015764254594817398,
+ "loss": 0.6133,
+ "step": 2543
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001576116866520642,
+ "loss": 0.4858,
+ "step": 2544
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015758081914178456,
+ "loss": 0.3691,
+ "step": 2545
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001575499434217361,
+ "loss": 0.5441,
+ "step": 2546
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001575190594963211,
+ "loss": 0.4605,
+ "step": 2547
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015748816736994284,
+ "loss": 0.3681,
+ "step": 2548
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015745726704700593,
+ "loss": 0.4113,
+ "step": 2549
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015742635853191608,
+ "loss": 0.5233,
+ "step": 2550
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015739544182908014,
+ "loss": 0.356,
+ "step": 2551
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015736451694290616,
+ "loss": 0.4105,
+ "step": 2552
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015733358387780337,
+ "loss": 0.4451,
+ "step": 2553
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015730264263818212,
+ "loss": 0.5023,
+ "step": 2554
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.000157271693228454,
+ "loss": 0.3671,
+ "step": 2555
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001572407356530317,
+ "loss": 0.7077,
+ "step": 2556
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015720976991632913,
+ "loss": 0.4439,
+ "step": 2557
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015717879602276122,
+ "loss": 0.5961,
+ "step": 2558
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001571478139767443,
+ "loss": 0.4269,
+ "step": 2559
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015711682378269565,
+ "loss": 0.3427,
+ "step": 2560
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015708582544503386,
+ "loss": 0.5736,
+ "step": 2561
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015705481896817854,
+ "loss": 0.3707,
+ "step": 2562
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001570238043565506,
+ "loss": 0.4076,
+ "step": 2563
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.000156992781614572,
+ "loss": 0.6514,
+ "step": 2564
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015696175074666598,
+ "loss": 0.4012,
+ "step": 2565
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.0001569307117572568,
+ "loss": 0.3492,
+ "step": 2566
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015689966465076992,
+ "loss": 0.4121,
+ "step": 2567
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015686860943163206,
+ "loss": 0.5769,
+ "step": 2568
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015683754610427094,
+ "loss": 0.4872,
+ "step": 2569
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015680647467311557,
+ "loss": 0.5518,
+ "step": 2570
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015677539514259608,
+ "loss": 0.411,
+ "step": 2571
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015674430751714361,
+ "loss": 0.3443,
+ "step": 2572
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015671321180119074,
+ "loss": 0.3706,
+ "step": 2573
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.0001566821079991709,
+ "loss": 0.6168,
+ "step": 2574
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.0001566509961155189,
+ "loss": 0.3726,
+ "step": 2575
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015661987615467058,
+ "loss": 0.3976,
+ "step": 2576
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015658874812106297,
+ "loss": 0.3697,
+ "step": 2577
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015655761201913425,
+ "loss": 0.2759,
+ "step": 2578
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015652646785332378,
+ "loss": 0.3572,
+ "step": 2579
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.000156495315628072,
+ "loss": 0.5333,
+ "step": 2580
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015646415534782056,
+ "loss": 0.4004,
+ "step": 2581
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.0001564329870170122,
+ "loss": 0.4736,
+ "step": 2582
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015640181064009088,
+ "loss": 0.4814,
+ "step": 2583
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015637062622150168,
+ "loss": 0.3351,
+ "step": 2584
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015633943376569081,
+ "loss": 0.4497,
+ "step": 2585
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015630823327710558,
+ "loss": 0.4202,
+ "step": 2586
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015627702476019457,
+ "loss": 0.5934,
+ "step": 2587
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.0001562458082194074,
+ "loss": 0.4664,
+ "step": 2588
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015621458365919487,
+ "loss": 0.4077,
+ "step": 2589
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015618335108400893,
+ "loss": 0.5244,
+ "step": 2590
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015615211049830268,
+ "loss": 0.5042,
+ "step": 2591
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015612086190653027,
+ "loss": 0.3442,
+ "step": 2592
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015608960531314717,
+ "loss": 0.6337,
+ "step": 2593
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015605834072260984,
+ "loss": 0.3542,
+ "step": 2594
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.0001560270681393759,
+ "loss": 0.5113,
+ "step": 2595
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.0001559957875679042,
+ "loss": 0.4346,
+ "step": 2596
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015596449901265463,
+ "loss": 0.5231,
+ "step": 2597
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015593320247808822,
+ "loss": 0.5193,
+ "step": 2598
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001559018979686673,
+ "loss": 0.3575,
+ "step": 2599
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015587058548885505,
+ "loss": 0.6356,
+ "step": 2600
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015583926504311605,
+ "loss": 0.3313,
+ "step": 2601
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015580793663591585,
+ "loss": 0.356,
+ "step": 2602
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015577660027172127,
+ "loss": 0.5498,
+ "step": 2603
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001557452559550001,
+ "loss": 0.3973,
+ "step": 2604
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001557139036902215,
+ "loss": 0.4751,
+ "step": 2605
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015568254348185544,
+ "loss": 0.4297,
+ "step": 2606
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015565117533437335,
+ "loss": 0.4299,
+ "step": 2607
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015561979925224754,
+ "loss": 0.4651,
+ "step": 2608
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015558841523995162,
+ "loss": 0.474,
+ "step": 2609
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015555702330196023,
+ "loss": 0.4143,
+ "step": 2610
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.0001555256234427492,
+ "loss": 0.393,
+ "step": 2611
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015549421566679546,
+ "loss": 0.3738,
+ "step": 2612
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015546279997857704,
+ "loss": 0.4394,
+ "step": 2613
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.0001554313763825732,
+ "loss": 0.3702,
+ "step": 2614
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015539994488326418,
+ "loss": 0.4594,
+ "step": 2615
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015536850548513147,
+ "loss": 0.3249,
+ "step": 2616
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015533705819265764,
+ "loss": 0.3857,
+ "step": 2617
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.0001553056030103264,
+ "loss": 0.3272,
+ "step": 2618
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015527413994262257,
+ "loss": 0.5204,
+ "step": 2619
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015524266899403206,
+ "loss": 0.3653,
+ "step": 2620
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.000155211190169042,
+ "loss": 0.4698,
+ "step": 2621
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.0001551797034721405,
+ "loss": 0.5949,
+ "step": 2622
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015514820890781693,
+ "loss": 0.4074,
+ "step": 2623
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015511670648056178,
+ "loss": 0.3586,
+ "step": 2624
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.0001550851961948665,
+ "loss": 0.6494,
+ "step": 2625
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015505367805522383,
+ "loss": 0.4914,
+ "step": 2626
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.0001550221520661276,
+ "loss": 0.4594,
+ "step": 2627
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015499061823207266,
+ "loss": 0.4102,
+ "step": 2628
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015495907655755506,
+ "loss": 0.4229,
+ "step": 2629
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.000154927527047072,
+ "loss": 0.7218,
+ "step": 2630
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001548959697051217,
+ "loss": 0.6929,
+ "step": 2631
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015486440453620358,
+ "loss": 0.3628,
+ "step": 2632
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015483283154481815,
+ "loss": 0.4433,
+ "step": 2633
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015480125073546704,
+ "loss": 0.3912,
+ "step": 2634
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001547696621126529,
+ "loss": 0.3682,
+ "step": 2635
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015473806568087968,
+ "loss": 0.354,
+ "step": 2636
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001547064614446523,
+ "loss": 0.4789,
+ "step": 2637
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001546748494084768,
+ "loss": 0.382,
+ "step": 2638
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015464322957686041,
+ "loss": 0.4954,
+ "step": 2639
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015461160195431148,
+ "loss": 0.3273,
+ "step": 2640
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001545799665453393,
+ "loss": 0.3414,
+ "step": 2641
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015454832335445447,
+ "loss": 0.5479,
+ "step": 2642
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001545166723861686,
+ "loss": 0.4963,
+ "step": 2643
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015448501364499445,
+ "loss": 0.5547,
+ "step": 2644
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001544533471354458,
+ "loss": 0.4637,
+ "step": 2645
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015442167286203767,
+ "loss": 0.4248,
+ "step": 2646
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015438999082928608,
+ "loss": 0.4213,
+ "step": 2647
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015435830104170822,
+ "loss": 0.3734,
+ "step": 2648
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015432660350382234,
+ "loss": 0.4627,
+ "step": 2649
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.0001542948982201479,
+ "loss": 0.3422,
+ "step": 2650
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015426318519520525,
+ "loss": 0.4409,
+ "step": 2651
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015423146443351607,
+ "loss": 0.3717,
+ "step": 2652
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015419973593960298,
+ "loss": 0.4349,
+ "step": 2653
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015416799971798985,
+ "loss": 0.5349,
+ "step": 2654
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.0001541362557732015,
+ "loss": 0.4511,
+ "step": 2655
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.000154104504109764,
+ "loss": 0.5997,
+ "step": 2656
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015407274473220434,
+ "loss": 0.661,
+ "step": 2657
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015404097764505083,
+ "loss": 0.3456,
+ "step": 2658
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015400920285283268,
+ "loss": 0.3416,
+ "step": 2659
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015397742036008034,
+ "loss": 0.4707,
+ "step": 2660
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015394563017132526,
+ "loss": 0.3221,
+ "step": 2661
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015391383229110007,
+ "loss": 0.6108,
+ "step": 2662
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015388202672393834,
+ "loss": 0.5504,
+ "step": 2663
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015385021347437498,
+ "loss": 0.3973,
+ "step": 2664
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015381839254694583,
+ "loss": 0.5149,
+ "step": 2665
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015378656394618787,
+ "loss": 0.5853,
+ "step": 2666
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.0001537547276766391,
+ "loss": 0.517,
+ "step": 2667
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015372288374283875,
+ "loss": 0.5485,
+ "step": 2668
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015369103214932703,
+ "loss": 0.4907,
+ "step": 2669
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.0001536591729006453,
+ "loss": 0.3169,
+ "step": 2670
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015362730600133596,
+ "loss": 0.5431,
+ "step": 2671
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015359543145594258,
+ "loss": 0.2586,
+ "step": 2672
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015356354926900979,
+ "loss": 0.5251,
+ "step": 2673
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015353165944508325,
+ "loss": 0.4104,
+ "step": 2674
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015349976198870973,
+ "loss": 0.4825,
+ "step": 2675
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015346785690443718,
+ "loss": 0.5274,
+ "step": 2676
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.0001534359441968145,
+ "loss": 0.3878,
+ "step": 2677
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.0001534040238703918,
+ "loss": 0.5132,
+ "step": 2678
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015337209592972023,
+ "loss": 0.5145,
+ "step": 2679
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015334016037935196,
+ "loss": 0.5548,
+ "step": 2680
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015330821722384037,
+ "loss": 0.7494,
+ "step": 2681
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015327626646773976,
+ "loss": 0.5569,
+ "step": 2682
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015324430811560573,
+ "loss": 0.2622,
+ "step": 2683
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001532123421719948,
+ "loss": 0.3749,
+ "step": 2684
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015318036864146457,
+ "loss": 0.3959,
+ "step": 2685
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001531483875285738,
+ "loss": 0.5243,
+ "step": 2686
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001531163988378823,
+ "loss": 0.3115,
+ "step": 2687
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015308440257395093,
+ "loss": 0.2385,
+ "step": 2688
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015305239874134174,
+ "loss": 0.4431,
+ "step": 2689
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.0001530203873446177,
+ "loss": 0.378,
+ "step": 2690
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015298836838834298,
+ "loss": 0.4521,
+ "step": 2691
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015295634187708279,
+ "loss": 0.6309,
+ "step": 2692
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015292430781540335,
+ "loss": 0.4355,
+ "step": 2693
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015289226620787208,
+ "loss": 0.4537,
+ "step": 2694
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.0001528602170590574,
+ "loss": 0.4305,
+ "step": 2695
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015282816037352878,
+ "loss": 0.5355,
+ "step": 2696
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015279609615585687,
+ "loss": 0.5243,
+ "step": 2697
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001527640244106133,
+ "loss": 0.5334,
+ "step": 2698
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015273194514237078,
+ "loss": 0.5409,
+ "step": 2699
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001526998583557031,
+ "loss": 0.4042,
+ "step": 2700
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015266776405518524,
+ "loss": 0.5536,
+ "step": 2701
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.000152635662245393,
+ "loss": 0.2743,
+ "step": 2702
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015260355293090353,
+ "loss": 0.4762,
+ "step": 2703
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015257143611629482,
+ "loss": 0.4552,
+ "step": 2704
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001525393118061461,
+ "loss": 0.5395,
+ "step": 2705
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001525071800050375,
+ "loss": 0.4297,
+ "step": 2706
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015247504071755046,
+ "loss": 0.364,
+ "step": 2707
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015244289394826722,
+ "loss": 0.9499,
+ "step": 2708
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015241073970177126,
+ "loss": 0.579,
+ "step": 2709
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.000152378577982647,
+ "loss": 0.3111,
+ "step": 2710
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001523464087954801,
+ "loss": 0.3345,
+ "step": 2711
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015231423214485715,
+ "loss": 0.4628,
+ "step": 2712
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015228204803536586,
+ "loss": 0.4803,
+ "step": 2713
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001522498564715949,
+ "loss": 0.4164,
+ "step": 2714
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015221765745813417,
+ "loss": 0.6468,
+ "step": 2715
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015218545099957449,
+ "loss": 0.4495,
+ "step": 2716
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015215323710050785,
+ "loss": 0.4184,
+ "step": 2717
+ },
+ {
+ "epoch": 3.22,
+ "eval_loss": 2.9206559658050537,
+ "eval_runtime": 283.9002,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2717
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015212101576552722,
+ "loss": 0.4215,
+ "step": 2718
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015208878699922668,
+ "loss": 1.4488,
+ "step": 2719
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.0001520565508062013,
+ "loss": 0.4449,
+ "step": 2720
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.0001520243071910473,
+ "loss": 0.2853,
+ "step": 2721
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015199205615836191,
+ "loss": 0.4572,
+ "step": 2722
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015195979771274342,
+ "loss": 0.4436,
+ "step": 2723
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.0001519275318587912,
+ "loss": 0.38,
+ "step": 2724
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015189525860110563,
+ "loss": 0.4956,
+ "step": 2725
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015186297794428816,
+ "loss": 0.8514,
+ "step": 2726
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015183068989294132,
+ "loss": 0.4518,
+ "step": 2727
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015179839445166873,
+ "loss": 0.5581,
+ "step": 2728
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015176609162507497,
+ "loss": 0.5828,
+ "step": 2729
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015173378141776568,
+ "loss": 0.4109,
+ "step": 2730
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015170146383434768,
+ "loss": 0.5762,
+ "step": 2731
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015166913887942868,
+ "loss": 0.4502,
+ "step": 2732
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015163680655761757,
+ "loss": 0.3736,
+ "step": 2733
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015160446687352417,
+ "loss": 0.3771,
+ "step": 2734
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015157211983175947,
+ "loss": 0.469,
+ "step": 2735
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015153976543693542,
+ "loss": 0.665,
+ "step": 2736
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015150740369366508,
+ "loss": 0.3495,
+ "step": 2737
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.0001514750346065625,
+ "loss": 0.4513,
+ "step": 2738
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.0001514426581802428,
+ "loss": 0.4571,
+ "step": 2739
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015141027441932216,
+ "loss": 0.4197,
+ "step": 2740
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015137788332841785,
+ "loss": 0.3396,
+ "step": 2741
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015134548491214806,
+ "loss": 0.3547,
+ "step": 2742
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015131307917513214,
+ "loss": 0.3073,
+ "step": 2743
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015128066612199044,
+ "loss": 0.7091,
+ "step": 2744
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015124824575734428,
+ "loss": 0.2845,
+ "step": 2745
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015121581808581622,
+ "loss": 0.2903,
+ "step": 2746
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015118338311202964,
+ "loss": 0.4065,
+ "step": 2747
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015115094084060916,
+ "loss": 0.6152,
+ "step": 2748
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015111849127618022,
+ "loss": 0.5352,
+ "step": 2749
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.0001510860344233695,
+ "loss": 0.414,
+ "step": 2750
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015105357028680457,
+ "loss": 0.4756,
+ "step": 2751
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015102109887111422,
+ "loss": 0.4644,
+ "step": 2752
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015098862018092808,
+ "loss": 0.4231,
+ "step": 2753
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015095613422087692,
+ "loss": 0.4617,
+ "step": 2754
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.0001509236409955925,
+ "loss": 0.5876,
+ "step": 2755
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001508911405097077,
+ "loss": 0.5696,
+ "step": 2756
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015085863276785637,
+ "loss": 0.3826,
+ "step": 2757
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001508261177746734,
+ "loss": 0.4338,
+ "step": 2758
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001507935955347947,
+ "loss": 0.3546,
+ "step": 2759
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015076106605285724,
+ "loss": 0.413,
+ "step": 2760
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.000150728529333499,
+ "loss": 0.3954,
+ "step": 2761
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015069598538135906,
+ "loss": 0.5214,
+ "step": 2762
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001506634342010774,
+ "loss": 0.5239,
+ "step": 2763
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015063087579729519,
+ "loss": 0.8681,
+ "step": 2764
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015059831017465449,
+ "loss": 0.4616,
+ "step": 2765
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015056573733779848,
+ "loss": 0.4721,
+ "step": 2766
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015053315729137128,
+ "loss": 0.4449,
+ "step": 2767
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.0001505005700400182,
+ "loss": 0.569,
+ "step": 2768
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015046797558838535,
+ "loss": 0.4926,
+ "step": 2769
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015043537394112007,
+ "loss": 0.462,
+ "step": 2770
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015040276510287063,
+ "loss": 0.6983,
+ "step": 2771
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015037014907828632,
+ "loss": 0.4644,
+ "step": 2772
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001503375258720175,
+ "loss": 0.5924,
+ "step": 2773
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015030489548871544,
+ "loss": 0.5282,
+ "step": 2774
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015027225793303264,
+ "loss": 0.4757,
+ "step": 2775
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015023961320962247,
+ "loss": 0.5014,
+ "step": 2776
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001502069613231393,
+ "loss": 0.3455,
+ "step": 2777
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015017430227823864,
+ "loss": 0.4525,
+ "step": 2778
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001501416360795769,
+ "loss": 0.51,
+ "step": 2779
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015010896273181165,
+ "loss": 0.3766,
+ "step": 2780
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001500762822396013,
+ "loss": 0.3162,
+ "step": 2781
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00015004359460760546,
+ "loss": 0.406,
+ "step": 2782
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00015001089984048463,
+ "loss": 0.4671,
+ "step": 2783
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014997819794290034,
+ "loss": 0.4299,
+ "step": 2784
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014994548891951524,
+ "loss": 0.5494,
+ "step": 2785
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001499127727749929,
+ "loss": 0.351,
+ "step": 2786
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014988004951399785,
+ "loss": 0.3807,
+ "step": 2787
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014984731914119586,
+ "loss": 0.3999,
+ "step": 2788
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001498145816612534,
+ "loss": 0.7609,
+ "step": 2789
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014978183707883827,
+ "loss": 0.4466,
+ "step": 2790
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014974908539861905,
+ "loss": 0.592,
+ "step": 2791
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014971632662526545,
+ "loss": 0.4786,
+ "step": 2792
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014968356076344814,
+ "loss": 0.4087,
+ "step": 2793
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.0001496507878178388,
+ "loss": 0.3811,
+ "step": 2794
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014961800779311014,
+ "loss": 0.4091,
+ "step": 2795
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014958522069393593,
+ "loss": 0.6861,
+ "step": 2796
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014955242652499084,
+ "loss": 0.3346,
+ "step": 2797
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014951962529095064,
+ "loss": 0.5417,
+ "step": 2798
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.000149486816996492,
+ "loss": 0.7325,
+ "step": 2799
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014945400164629278,
+ "loss": 0.5007,
+ "step": 2800
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014942117924503164,
+ "loss": 0.4217,
+ "step": 2801
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014938834979738835,
+ "loss": 0.5265,
+ "step": 2802
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014935551330804372,
+ "loss": 0.4376,
+ "step": 2803
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.0001493226697816795,
+ "loss": 0.5068,
+ "step": 2804
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014928981922297842,
+ "loss": 0.6248,
+ "step": 2805
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001492569616366243,
+ "loss": 0.593,
+ "step": 2806
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001492240970273019,
+ "loss": 0.6713,
+ "step": 2807
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014919122539969697,
+ "loss": 0.5736,
+ "step": 2808
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014915834675849633,
+ "loss": 0.3006,
+ "step": 2809
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014912546110838775,
+ "loss": 0.5175,
+ "step": 2810
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014909256845405998,
+ "loss": 0.52,
+ "step": 2811
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014905966880020282,
+ "loss": 0.5491,
+ "step": 2812
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014902676215150702,
+ "loss": 0.6007,
+ "step": 2813
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001489938485126644,
+ "loss": 0.6552,
+ "step": 2814
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014896092788836763,
+ "loss": 0.3624,
+ "step": 2815
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001489280002833106,
+ "loss": 0.2626,
+ "step": 2816
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014889506570218796,
+ "loss": 0.409,
+ "step": 2817
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014886212414969553,
+ "loss": 0.473,
+ "step": 2818
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014882917563052998,
+ "loss": 0.4205,
+ "step": 2819
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014879622014938915,
+ "loss": 0.4603,
+ "step": 2820
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001487632577109717,
+ "loss": 0.3522,
+ "step": 2821
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001487302883199774,
+ "loss": 0.3787,
+ "step": 2822
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014869731198110695,
+ "loss": 0.6,
+ "step": 2823
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.000148664328699062,
+ "loss": 0.4291,
+ "step": 2824
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014863133847854533,
+ "loss": 0.4358,
+ "step": 2825
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.0001485983413242606,
+ "loss": 0.4144,
+ "step": 2826
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.0001485653372409125,
+ "loss": 0.842,
+ "step": 2827
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014853232623320662,
+ "loss": 0.3398,
+ "step": 2828
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014849930830584972,
+ "loss": 0.5005,
+ "step": 2829
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014846628346354933,
+ "loss": 0.5777,
+ "step": 2830
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014843325171101413,
+ "loss": 0.3953,
+ "step": 2831
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014840021305295373,
+ "loss": 0.4056,
+ "step": 2832
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014836716749407872,
+ "loss": 0.7682,
+ "step": 2833
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.0001483341150391006,
+ "loss": 0.3208,
+ "step": 2834
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014830105569273204,
+ "loss": 0.4317,
+ "step": 2835
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014826798945968654,
+ "loss": 0.363,
+ "step": 2836
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014823491634467862,
+ "loss": 0.3784,
+ "step": 2837
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014820183635242374,
+ "loss": 0.9267,
+ "step": 2838
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.0001481687494876385,
+ "loss": 0.4245,
+ "step": 2839
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014813565575504022,
+ "loss": 0.3929,
+ "step": 2840
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014810255515934747,
+ "loss": 0.5171,
+ "step": 2841
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014806944770527958,
+ "loss": 0.5181,
+ "step": 2842
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014803633339755703,
+ "loss": 0.4765,
+ "step": 2843
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014800321224090114,
+ "loss": 0.4433,
+ "step": 2844
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014797008424003428,
+ "loss": 0.461,
+ "step": 2845
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.0001479369493996798,
+ "loss": 0.5688,
+ "step": 2846
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014790380772456197,
+ "loss": 0.4822,
+ "step": 2847
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.0001478706592194061,
+ "loss": 0.4993,
+ "step": 2848
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014783750388893842,
+ "loss": 0.3967,
+ "step": 2849
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014780434173788617,
+ "loss": 0.4708,
+ "step": 2850
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014777117277097758,
+ "loss": 0.5721,
+ "step": 2851
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014773799699294176,
+ "loss": 0.5276,
+ "step": 2852
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014770481440850891,
+ "loss": 0.4135,
+ "step": 2853
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.0001476716250224101,
+ "loss": 0.716,
+ "step": 2854
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014763842883937743,
+ "loss": 0.3663,
+ "step": 2855
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014760522586414396,
+ "loss": 0.4105,
+ "step": 2856
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014757201610144372,
+ "loss": 0.4554,
+ "step": 2857
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014753879955601163,
+ "loss": 0.4366,
+ "step": 2858
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.0001475055762325837,
+ "loss": 0.3752,
+ "step": 2859
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014747234613589685,
+ "loss": 0.3747,
+ "step": 2860
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.000147439109270689,
+ "loss": 0.5533,
+ "step": 2861
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014740586564169892,
+ "loss": 0.4962,
+ "step": 2862
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014737261525366648,
+ "loss": 0.5318,
+ "step": 2863
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014733935811133244,
+ "loss": 0.4592,
+ "step": 2864
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014730609421943855,
+ "loss": 0.429,
+ "step": 2865
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014727282358272754,
+ "loss": 0.4163,
+ "step": 2866
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014723954620594304,
+ "loss": 0.4811,
+ "step": 2867
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.0001472062620938297,
+ "loss": 0.4662,
+ "step": 2868
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014717297125113311,
+ "loss": 0.531,
+ "step": 2869
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.0001471396736825998,
+ "loss": 0.3233,
+ "step": 2870
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014710636939297724,
+ "loss": 0.4171,
+ "step": 2871
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.000147073058387014,
+ "loss": 0.5412,
+ "step": 2872
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014703974066945943,
+ "loss": 0.4357,
+ "step": 2873
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014700641624506392,
+ "loss": 0.3889,
+ "step": 2874
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001469730851185788,
+ "loss": 0.456,
+ "step": 2875
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014693974729475636,
+ "loss": 0.4365,
+ "step": 2876
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001469064027783499,
+ "loss": 0.3947,
+ "step": 2877
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014687305157411355,
+ "loss": 0.5718,
+ "step": 2878
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001468396936868025,
+ "loss": 0.4652,
+ "step": 2879
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014680632912117286,
+ "loss": 0.4242,
+ "step": 2880
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.0001467729578819817,
+ "loss": 0.5045,
+ "step": 2881
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014673957997398695,
+ "loss": 0.4098,
+ "step": 2882
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014670619540194766,
+ "loss": 0.597,
+ "step": 2883
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014667280417062374,
+ "loss": 0.5208,
+ "step": 2884
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014663940628477598,
+ "loss": 0.4881,
+ "step": 2885
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014660600174916627,
+ "loss": 0.5234,
+ "step": 2886
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.0001465725905685573,
+ "loss": 0.439,
+ "step": 2887
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014653917274771284,
+ "loss": 0.4498,
+ "step": 2888
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014650574829139747,
+ "loss": 0.4837,
+ "step": 2889
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014647231720437686,
+ "loss": 0.4232,
+ "step": 2890
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014643887949141753,
+ "loss": 0.4467,
+ "step": 2891
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014640543515728695,
+ "loss": 0.3566,
+ "step": 2892
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014637198420675354,
+ "loss": 0.3888,
+ "step": 2893
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014633852664458673,
+ "loss": 0.326,
+ "step": 2894
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.0001463050624755568,
+ "loss": 0.3608,
+ "step": 2895
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014627159170443502,
+ "loss": 0.5326,
+ "step": 2896
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014623811433599359,
+ "loss": 0.3171,
+ "step": 2897
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014620463037500568,
+ "loss": 0.4619,
+ "step": 2898
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014617113982624526,
+ "loss": 0.7739,
+ "step": 2899
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014613764269448751,
+ "loss": 0.4327,
+ "step": 2900
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.0001461041389845083,
+ "loss": 0.6078,
+ "step": 2901
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014607062870108456,
+ "loss": 0.3863,
+ "step": 2902
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014603711184899408,
+ "loss": 0.4787,
+ "step": 2903
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014600358843301568,
+ "loss": 0.2997,
+ "step": 2904
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014597005845792905,
+ "loss": 0.3657,
+ "step": 2905
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014593652192851486,
+ "loss": 0.334,
+ "step": 2906
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014590297884955463,
+ "loss": 0.6809,
+ "step": 2907
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.0001458694292258309,
+ "loss": 0.4739,
+ "step": 2908
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014583587306212713,
+ "loss": 0.4139,
+ "step": 2909
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014580231036322768,
+ "loss": 0.3307,
+ "step": 2910
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014576874113391789,
+ "loss": 0.4155,
+ "step": 2911
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014573516537898394,
+ "loss": 0.4461,
+ "step": 2912
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014570158310321305,
+ "loss": 0.4775,
+ "step": 2913
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.0001456679943113933,
+ "loss": 0.344,
+ "step": 2914
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014563439900831373,
+ "loss": 0.3568,
+ "step": 2915
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014560079719876424,
+ "loss": 0.3808,
+ "step": 2916
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.0001455671888875358,
+ "loss": 0.5467,
+ "step": 2917
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014553357407942022,
+ "loss": 0.5267,
+ "step": 2918
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014549995277921015,
+ "loss": 0.4476,
+ "step": 2919
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014546632499169937,
+ "loss": 0.4463,
+ "step": 2920
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014543269072168235,
+ "loss": 0.5553,
+ "step": 2921
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014539904997395468,
+ "loss": 0.5476,
+ "step": 2922
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001453654027533128,
+ "loss": 0.4443,
+ "step": 2923
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014533174906455404,
+ "loss": 0.4353,
+ "step": 2924
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014529808891247667,
+ "loss": 0.4479,
+ "step": 2925
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014526442230187995,
+ "loss": 0.3951,
+ "step": 2926
+ },
+ {
+ "epoch": 3.47,
+ "eval_loss": 2.882225751876831,
+ "eval_runtime": 283.9462,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 2926
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014523074923756394,
+ "loss": 0.679,
+ "step": 2927
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001451970697243297,
+ "loss": 0.4178,
+ "step": 2928
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001451633837669792,
+ "loss": 0.4121,
+ "step": 2929
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014512969137031538,
+ "loss": 0.3929,
+ "step": 2930
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014509599253914195,
+ "loss": 0.366,
+ "step": 2931
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.0001450622872782637,
+ "loss": 0.3528,
+ "step": 2932
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014502857559248617,
+ "loss": 0.5003,
+ "step": 2933
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014499485748661604,
+ "loss": 0.4901,
+ "step": 2934
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014496113296546067,
+ "loss": 0.4538,
+ "step": 2935
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014492740203382847,
+ "loss": 0.4549,
+ "step": 2936
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.0001448936646965288,
+ "loss": 0.5464,
+ "step": 2937
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014485992095837177,
+ "loss": 0.43,
+ "step": 2938
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014482617082416858,
+ "loss": 0.5893,
+ "step": 2939
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001447924142987312,
+ "loss": 0.4947,
+ "step": 2940
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014475865138687262,
+ "loss": 0.4903,
+ "step": 2941
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001447248820934067,
+ "loss": 0.4933,
+ "step": 2942
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014469110642314817,
+ "loss": 0.4516,
+ "step": 2943
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001446573243809127,
+ "loss": 0.469,
+ "step": 2944
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014462353597151684,
+ "loss": 0.6531,
+ "step": 2945
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014458974119977818,
+ "loss": 0.2754,
+ "step": 2946
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001445559400705151,
+ "loss": 0.5676,
+ "step": 2947
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014452213258854684,
+ "loss": 0.5903,
+ "step": 2948
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014448831875869364,
+ "loss": 0.5022,
+ "step": 2949
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.0001444544985857766,
+ "loss": 0.3509,
+ "step": 2950
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014442067207461775,
+ "loss": 0.3921,
+ "step": 2951
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014438683923004005,
+ "loss": 0.4997,
+ "step": 2952
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014435300005686728,
+ "loss": 0.6218,
+ "step": 2953
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014431915455992414,
+ "loss": 0.4097,
+ "step": 2954
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014428530274403632,
+ "loss": 0.3478,
+ "step": 2955
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014425144461403035,
+ "loss": 0.4506,
+ "step": 2956
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014421758017473362,
+ "loss": 0.4025,
+ "step": 2957
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014418370943097448,
+ "loss": 0.3838,
+ "step": 2958
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014414983238758217,
+ "loss": 0.6366,
+ "step": 2959
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014411594904938682,
+ "loss": 0.4649,
+ "step": 2960
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014408205942121943,
+ "loss": 0.3361,
+ "step": 2961
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014404816350791188,
+ "loss": 0.3692,
+ "step": 2962
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.0001440142613142971,
+ "loss": 0.6162,
+ "step": 2963
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014398035284520874,
+ "loss": 0.5935,
+ "step": 2964
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001439464381054814,
+ "loss": 0.545,
+ "step": 2965
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014391251709995061,
+ "loss": 0.4178,
+ "step": 2966
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014387858983345276,
+ "loss": 0.5552,
+ "step": 2967
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001438446563108251,
+ "loss": 0.4506,
+ "step": 2968
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014381071653690587,
+ "loss": 0.429,
+ "step": 2969
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014377677051653404,
+ "loss": 0.3897,
+ "step": 2970
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001437428182545497,
+ "loss": 0.4663,
+ "step": 2971
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014370885975579364,
+ "loss": 0.4643,
+ "step": 2972
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001436748950251076,
+ "loss": 0.5433,
+ "step": 2973
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001436409240673342,
+ "loss": 0.4967,
+ "step": 2974
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.000143606946887317,
+ "loss": 0.3717,
+ "step": 2975
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014357296348990037,
+ "loss": 0.4166,
+ "step": 2976
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001435389738799296,
+ "loss": 0.455,
+ "step": 2977
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014350497806225087,
+ "loss": 0.4603,
+ "step": 2978
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014347097604171127,
+ "loss": 0.4325,
+ "step": 2979
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001434369678231587,
+ "loss": 0.4375,
+ "step": 2980
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014340295341144202,
+ "loss": 0.4932,
+ "step": 2981
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014336893281141096,
+ "loss": 0.5264,
+ "step": 2982
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014333490602791608,
+ "loss": 0.4677,
+ "step": 2983
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014330087306580887,
+ "loss": 0.6505,
+ "step": 2984
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014326683392994167,
+ "loss": 0.4451,
+ "step": 2985
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014323278862516775,
+ "loss": 0.4025,
+ "step": 2986
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.0001431987371563412,
+ "loss": 0.5084,
+ "step": 2987
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.000143164679528317,
+ "loss": 0.4806,
+ "step": 2988
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014313061574595115,
+ "loss": 0.3954,
+ "step": 2989
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014309654581410024,
+ "loss": 0.4339,
+ "step": 2990
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.000143062469737622,
+ "loss": 0.6739,
+ "step": 2991
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014302838752137487,
+ "loss": 0.6414,
+ "step": 2992
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014299429917021827,
+ "loss": 0.5075,
+ "step": 2993
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014296020468901246,
+ "loss": 0.4105,
+ "step": 2994
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014292610408261856,
+ "loss": 0.7371,
+ "step": 2995
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014289199735589852,
+ "loss": 0.7485,
+ "step": 2996
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014285788451371534,
+ "loss": 0.7629,
+ "step": 2997
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014282376556093264,
+ "loss": 0.3849,
+ "step": 2998
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014278964050241512,
+ "loss": 0.5355,
+ "step": 2999
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014275550934302823,
+ "loss": 0.4077,
+ "step": 3000
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014272137208763832,
+ "loss": 0.5352,
+ "step": 3001
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014268722874111265,
+ "loss": 0.5257,
+ "step": 3002
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014265307930831932,
+ "loss": 0.4265,
+ "step": 3003
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014261892379412728,
+ "loss": 0.5776,
+ "step": 3004
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.0001425847622034063,
+ "loss": 0.3521,
+ "step": 3005
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014255059454102722,
+ "loss": 0.6203,
+ "step": 3006
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014251642081186146,
+ "loss": 0.5238,
+ "step": 3007
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014248224102078152,
+ "loss": 0.3887,
+ "step": 3008
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014244805517266067,
+ "loss": 0.5001,
+ "step": 3009
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001424138632723731,
+ "loss": 0.555,
+ "step": 3010
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001423796653247938,
+ "loss": 0.6137,
+ "step": 3011
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014234546133479867,
+ "loss": 0.8052,
+ "step": 3012
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001423112513072644,
+ "loss": 0.5392,
+ "step": 3013
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014227703524706867,
+ "loss": 0.5067,
+ "step": 3014
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.0001422428131590899,
+ "loss": 0.4016,
+ "step": 3015
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014220858504820742,
+ "loss": 0.4165,
+ "step": 3016
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014217435091930141,
+ "loss": 0.7395,
+ "step": 3017
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014214011077725292,
+ "loss": 0.4985,
+ "step": 3018
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014210586462694384,
+ "loss": 0.4821,
+ "step": 3019
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014207161247325691,
+ "loss": 0.6046,
+ "step": 3020
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014203735432107576,
+ "loss": 0.568,
+ "step": 3021
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014200309017528486,
+ "loss": 0.7383,
+ "step": 3022
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.0001419688200407695,
+ "loss": 0.5296,
+ "step": 3023
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014193454392241592,
+ "loss": 0.6391,
+ "step": 3024
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014190026182511102,
+ "loss": 0.4523,
+ "step": 3025
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.0001418659737537428,
+ "loss": 0.482,
+ "step": 3026
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014183167971319998,
+ "loss": 0.4519,
+ "step": 3027
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014179737970837207,
+ "loss": 0.4156,
+ "step": 3028
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014176307374414956,
+ "loss": 0.5142,
+ "step": 3029
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014172876182542372,
+ "loss": 0.4068,
+ "step": 3030
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014169444395708666,
+ "loss": 0.5908,
+ "step": 3031
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.0001416601201440314,
+ "loss": 0.511,
+ "step": 3032
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014162579039115174,
+ "loss": 0.5165,
+ "step": 3033
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014159145470334235,
+ "loss": 0.4449,
+ "step": 3034
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014155711308549878,
+ "loss": 0.4808,
+ "step": 3035
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014152276554251736,
+ "loss": 0.5365,
+ "step": 3036
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014148841207929527,
+ "loss": 0.6016,
+ "step": 3037
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.0001414540527007307,
+ "loss": 0.379,
+ "step": 3038
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014141968741172238,
+ "loss": 0.6687,
+ "step": 3039
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014138531621717018,
+ "loss": 0.6219,
+ "step": 3040
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001413509391219746,
+ "loss": 0.3408,
+ "step": 3041
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014131655613103708,
+ "loss": 0.5148,
+ "step": 3042
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001412821672492599,
+ "loss": 0.3811,
+ "step": 3043
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001412477724815462,
+ "loss": 0.4691,
+ "step": 3044
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014121337183279988,
+ "loss": 0.6919,
+ "step": 3045
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001411789653079257,
+ "loss": 0.5804,
+ "step": 3046
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014114455291182933,
+ "loss": 0.418,
+ "step": 3047
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001411101346494172,
+ "loss": 0.4422,
+ "step": 3048
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410757105255966,
+ "loss": 0.389,
+ "step": 3049
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410412805452757,
+ "loss": 0.4083,
+ "step": 3050
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410068447133634,
+ "loss": 0.8703,
+ "step": 3051
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014097240303476954,
+ "loss": 0.4724,
+ "step": 3052
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014093795551440474,
+ "loss": 0.6257,
+ "step": 3053
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014090350215718048,
+ "loss": 0.5212,
+ "step": 3054
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014086904296800902,
+ "loss": 0.4429,
+ "step": 3055
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014083457795180355,
+ "loss": 0.3496,
+ "step": 3056
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014080010711347798,
+ "loss": 0.3402,
+ "step": 3057
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.0001407656304579471,
+ "loss": 0.4783,
+ "step": 3058
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014073114799012653,
+ "loss": 0.3987,
+ "step": 3059
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014069665971493274,
+ "loss": 0.4755,
+ "step": 3060
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014066216563728303,
+ "loss": 0.4792,
+ "step": 3061
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014062766576209546,
+ "loss": 0.4275,
+ "step": 3062
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014059316009428893,
+ "loss": 0.3598,
+ "step": 3063
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014055864863878325,
+ "loss": 0.4887,
+ "step": 3064
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.000140524131400499,
+ "loss": 0.5421,
+ "step": 3065
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014048960838435753,
+ "loss": 0.352,
+ "step": 3066
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014045507959528118,
+ "loss": 0.3124,
+ "step": 3067
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014042054503819287,
+ "loss": 0.3955,
+ "step": 3068
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014038600471801658,
+ "loss": 0.455,
+ "step": 3069
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014035145863967692,
+ "loss": 0.5177,
+ "step": 3070
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014031690680809945,
+ "loss": 0.4205,
+ "step": 3071
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014028234922821054,
+ "loss": 0.4832,
+ "step": 3072
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001402477859049373,
+ "loss": 0.3496,
+ "step": 3073
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001402132168432077,
+ "loss": 0.5404,
+ "step": 3074
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014017864204795058,
+ "loss": 0.5106,
+ "step": 3075
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001401440615240955,
+ "loss": 0.6611,
+ "step": 3076
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014010947527657295,
+ "loss": 0.3879,
+ "step": 3077
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001400748833103141,
+ "loss": 0.3054,
+ "step": 3078
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014004028563025108,
+ "loss": 0.3461,
+ "step": 3079
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001400056822413167,
+ "loss": 0.482,
+ "step": 3080
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.0001399710731484447,
+ "loss": 0.3285,
+ "step": 3081
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013993645835656953,
+ "loss": 0.363,
+ "step": 3082
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013990183787062661,
+ "loss": 0.5092,
+ "step": 3083
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013986721169555194,
+ "loss": 0.3009,
+ "step": 3084
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013983257983628253,
+ "loss": 0.3831,
+ "step": 3085
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.0001397979422977561,
+ "loss": 0.3718,
+ "step": 3086
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013976329908491118,
+ "loss": 0.3401,
+ "step": 3087
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013972865020268722,
+ "loss": 0.5294,
+ "step": 3088
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013969399565602435,
+ "loss": 0.5054,
+ "step": 3089
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001396593354498635,
+ "loss": 0.4247,
+ "step": 3090
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013962466958914658,
+ "loss": 0.431,
+ "step": 3091
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013958999807881604,
+ "loss": 0.6341,
+ "step": 3092
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001395553209238154,
+ "loss": 0.5126,
+ "step": 3093
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013952063812908881,
+ "loss": 0.3775,
+ "step": 3094
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001394859496995813,
+ "loss": 0.5149,
+ "step": 3095
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013945125564023868,
+ "loss": 0.2879,
+ "step": 3096
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013941655595600756,
+ "loss": 0.5621,
+ "step": 3097
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013938185065183532,
+ "loss": 0.408,
+ "step": 3098
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013934713973267024,
+ "loss": 0.4247,
+ "step": 3099
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001393124232034613,
+ "loss": 0.4224,
+ "step": 3100
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001392777010691584,
+ "loss": 0.4142,
+ "step": 3101
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013924297333471204,
+ "loss": 0.6004,
+ "step": 3102
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013920824000507374,
+ "loss": 0.6016,
+ "step": 3103
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001391735010851956,
+ "loss": 0.4669,
+ "step": 3104
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013913875658003074,
+ "loss": 0.3987,
+ "step": 3105
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001391040064945329,
+ "loss": 0.471,
+ "step": 3106
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001390692508336568,
+ "loss": 0.6135,
+ "step": 3107
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013903448960235766,
+ "loss": 0.5369,
+ "step": 3108
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013899972280559183,
+ "loss": 0.3295,
+ "step": 3109
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001389649504483162,
+ "loss": 0.309,
+ "step": 3110
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013893017253548858,
+ "loss": 0.4026,
+ "step": 3111
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013889538907206755,
+ "loss": 0.4724,
+ "step": 3112
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001388606000630125,
+ "loss": 0.3606,
+ "step": 3113
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001388258055132835,
+ "loss": 0.4894,
+ "step": 3114
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001387910054278416,
+ "loss": 0.4832,
+ "step": 3115
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001387561998116484,
+ "loss": 0.4604,
+ "step": 3116
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013872138866966656,
+ "loss": 0.4377,
+ "step": 3117
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013868657200685934,
+ "loss": 0.3965,
+ "step": 3118
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001386517498281908,
+ "loss": 0.7653,
+ "step": 3119
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013861692213862584,
+ "loss": 0.5213,
+ "step": 3120
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013858208894313017,
+ "loss": 0.9296,
+ "step": 3121
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013854725024667016,
+ "loss": 0.7738,
+ "step": 3122
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013851240605421315,
+ "loss": 0.5826,
+ "step": 3123
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.0001384775563707271,
+ "loss": 0.5502,
+ "step": 3124
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013844270120118085,
+ "loss": 0.3535,
+ "step": 3125
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.0001384078405505439,
+ "loss": 0.4853,
+ "step": 3126
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013837297442378675,
+ "loss": 0.5819,
+ "step": 3127
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013833810282588044,
+ "loss": 0.3728,
+ "step": 3128
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013830322576179697,
+ "loss": 0.3327,
+ "step": 3129
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.000138268343236509,
+ "loss": 0.4618,
+ "step": 3130
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013823345525499004,
+ "loss": 0.3377,
+ "step": 3131
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013819856182221434,
+ "loss": 0.3154,
+ "step": 3132
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013816366294315695,
+ "loss": 0.5116,
+ "step": 3133
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.0001381287586227937,
+ "loss": 0.4987,
+ "step": 3134
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013809384886610118,
+ "loss": 0.5596,
+ "step": 3135
+ },
+ {
+ "epoch": 3.72,
+ "eval_loss": 2.939779281616211,
+ "eval_runtime": 283.9953,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 3135
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013805893367805678,
+ "loss": 0.5128,
+ "step": 3136
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.0001380240130636386,
+ "loss": 0.3149,
+ "step": 3137
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013798908702782558,
+ "loss": 0.4984,
+ "step": 3138
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.0001379541555755974,
+ "loss": 0.626,
+ "step": 3139
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013791921871193457,
+ "loss": 0.4949,
+ "step": 3140
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013788427644181823,
+ "loss": 0.5654,
+ "step": 3141
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.0001378493287702305,
+ "loss": 0.4197,
+ "step": 3142
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013781437570215406,
+ "loss": 0.4341,
+ "step": 3143
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013777941724257253,
+ "loss": 0.3576,
+ "step": 3144
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013774445339647014,
+ "loss": 0.3098,
+ "step": 3145
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013770948416883205,
+ "loss": 0.6052,
+ "step": 3146
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013767450956464407,
+ "loss": 0.4327,
+ "step": 3147
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013763952958889287,
+ "loss": 0.4717,
+ "step": 3148
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.0001376045442465657,
+ "loss": 0.5263,
+ "step": 3149
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013756955354265085,
+ "loss": 0.5021,
+ "step": 3150
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013753455748213714,
+ "loss": 0.4066,
+ "step": 3151
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013749955607001433,
+ "loss": 0.3461,
+ "step": 3152
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013746454931127278,
+ "loss": 0.4318,
+ "step": 3153
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013742953721090372,
+ "loss": 0.4195,
+ "step": 3154
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.0001373945197738991,
+ "loss": 0.3862,
+ "step": 3155
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013735949700525163,
+ "loss": 0.5916,
+ "step": 3156
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013732446890995484,
+ "loss": 0.5336,
+ "step": 3157
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013728943549300295,
+ "loss": 0.4104,
+ "step": 3158
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013725439675939095,
+ "loss": 0.541,
+ "step": 3159
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013721935271411464,
+ "loss": 0.5173,
+ "step": 3160
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013718430336217045,
+ "loss": 0.3866,
+ "step": 3161
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013714924870855571,
+ "loss": 0.6113,
+ "step": 3162
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013711418875826846,
+ "loss": 0.5817,
+ "step": 3163
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001370791235163075,
+ "loss": 0.5331,
+ "step": 3164
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013704405298767229,
+ "loss": 0.5744,
+ "step": 3165
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001370089771773632,
+ "loss": 0.494,
+ "step": 3166
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013697389609038124,
+ "loss": 0.4537,
+ "step": 3167
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013693880973172822,
+ "loss": 0.5494,
+ "step": 3168
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013690371810640665,
+ "loss": 0.537,
+ "step": 3169
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001368686212194199,
+ "loss": 0.4698,
+ "step": 3170
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013683351907577194,
+ "loss": 0.5254,
+ "step": 3171
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013679841168046767,
+ "loss": 0.3857,
+ "step": 3172
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013676329903851254,
+ "loss": 0.4464,
+ "step": 3173
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.0001367281811549129,
+ "loss": 0.5651,
+ "step": 3174
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.0001366930580346758,
+ "loss": 0.4192,
+ "step": 3175
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.000136657929682809,
+ "loss": 0.3364,
+ "step": 3176
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013662279610432104,
+ "loss": 0.3539,
+ "step": 3177
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013658765730422125,
+ "loss": 0.6074,
+ "step": 3178
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013655251328751957,
+ "loss": 0.5322,
+ "step": 3179
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013651736405922686,
+ "loss": 0.4176,
+ "step": 3180
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013648220962435458,
+ "loss": 0.4878,
+ "step": 3181
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.000136447049987915,
+ "loss": 0.6351,
+ "step": 3182
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013641188515492109,
+ "loss": 0.4487,
+ "step": 3183
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001363767151303866,
+ "loss": 0.4451,
+ "step": 3184
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013634153991932607,
+ "loss": 0.4944,
+ "step": 3185
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001363063595267547,
+ "loss": 0.5932,
+ "step": 3186
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013627117395768833,
+ "loss": 0.4964,
+ "step": 3187
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001362359832171438,
+ "loss": 0.6795,
+ "step": 3188
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013620078731013845,
+ "loss": 0.3862,
+ "step": 3189
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001361655862416905,
+ "loss": 0.3425,
+ "step": 3190
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001361303800168188,
+ "loss": 0.4361,
+ "step": 3191
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001360951686405431,
+ "loss": 0.5774,
+ "step": 3192
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013605995211788365,
+ "loss": 0.4044,
+ "step": 3193
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013602473045386165,
+ "loss": 0.3858,
+ "step": 3194
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013598950365349883,
+ "loss": 0.6136,
+ "step": 3195
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013595427172181785,
+ "loss": 0.329,
+ "step": 3196
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013591903466384203,
+ "loss": 0.3898,
+ "step": 3197
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013588379248459536,
+ "loss": 0.4809,
+ "step": 3198
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013584854518910262,
+ "loss": 0.4108,
+ "step": 3199
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013581329278238927,
+ "loss": 0.4655,
+ "step": 3200
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013577803526948162,
+ "loss": 0.4657,
+ "step": 3201
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013574277265540654,
+ "loss": 0.4842,
+ "step": 3202
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013570750494519175,
+ "loss": 0.4593,
+ "step": 3203
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013567223214386564,
+ "loss": 0.435,
+ "step": 3204
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013563695425645737,
+ "loss": 0.7146,
+ "step": 3205
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013560167128799674,
+ "loss": 0.5027,
+ "step": 3206
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013556638324351442,
+ "loss": 0.4844,
+ "step": 3207
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013553109012804163,
+ "loss": 0.7605,
+ "step": 3208
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013549579194661044,
+ "loss": 0.396,
+ "step": 3209
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013546048870425356,
+ "loss": 0.5178,
+ "step": 3210
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013542518040600453,
+ "loss": 0.6946,
+ "step": 3211
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.0001353898670568975,
+ "loss": 0.5054,
+ "step": 3212
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013535454866196739,
+ "loss": 0.4495,
+ "step": 3213
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013531922522624982,
+ "loss": 0.5138,
+ "step": 3214
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001352838967547812,
+ "loss": 0.4706,
+ "step": 3215
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013524856325259848,
+ "loss": 0.5193,
+ "step": 3216
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001352132247247396,
+ "loss": 0.4436,
+ "step": 3217
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013517788117624292,
+ "loss": 0.4139,
+ "step": 3218
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001351425326121478,
+ "loss": 0.5937,
+ "step": 3219
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.000135107179037494,
+ "loss": 0.3375,
+ "step": 3220
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013507182045732234,
+ "loss": 0.3712,
+ "step": 3221
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013503645687667408,
+ "loss": 0.3424,
+ "step": 3222
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013500108830059133,
+ "loss": 0.3333,
+ "step": 3223
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013496571473411688,
+ "loss": 0.4042,
+ "step": 3224
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013493033618229417,
+ "loss": 0.4963,
+ "step": 3225
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.0001348949526501675,
+ "loss": 0.3946,
+ "step": 3226
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013485956414278178,
+ "loss": 0.5807,
+ "step": 3227
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013482417066518256,
+ "loss": 0.4561,
+ "step": 3228
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013478877222241627,
+ "loss": 0.4964,
+ "step": 3229
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013475336881952986,
+ "loss": 0.6429,
+ "step": 3230
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013471796046157116,
+ "loss": 0.5466,
+ "step": 3231
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013468254715358861,
+ "loss": 0.3882,
+ "step": 3232
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013464712890063138,
+ "loss": 0.5006,
+ "step": 3233
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.0001346117057077493,
+ "loss": 0.494,
+ "step": 3234
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013457627757999303,
+ "loss": 0.5444,
+ "step": 3235
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013454084452241372,
+ "loss": 0.3714,
+ "step": 3236
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013450540654006348,
+ "loss": 0.3335,
+ "step": 3237
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.0001344699636379949,
+ "loss": 0.4771,
+ "step": 3238
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013443451582126144,
+ "loss": 0.466,
+ "step": 3239
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013439906309491712,
+ "loss": 0.5537,
+ "step": 3240
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013436360546401676,
+ "loss": 0.5899,
+ "step": 3241
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013432814293361584,
+ "loss": 0.443,
+ "step": 3242
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013429267550877055,
+ "loss": 0.4238,
+ "step": 3243
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013425720319453773,
+ "loss": 0.6529,
+ "step": 3244
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013422172599597505,
+ "loss": 0.6163,
+ "step": 3245
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013418624391814068,
+ "loss": 0.5183,
+ "step": 3246
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013415075696609363,
+ "loss": 0.7659,
+ "step": 3247
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001341152651448936,
+ "loss": 0.3717,
+ "step": 3248
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340797684596009,
+ "loss": 0.6885,
+ "step": 3249
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340442669152766,
+ "loss": 0.4483,
+ "step": 3250
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340087605169825,
+ "loss": 0.3417,
+ "step": 3251
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013397324926978094,
+ "loss": 0.4751,
+ "step": 3252
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013393773317873508,
+ "loss": 0.4448,
+ "step": 3253
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013390221224890878,
+ "loss": 0.6278,
+ "step": 3254
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013386668648536655,
+ "loss": 0.2995,
+ "step": 3255
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013383115589317353,
+ "loss": 0.535,
+ "step": 3256
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013379562047739568,
+ "loss": 0.4972,
+ "step": 3257
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013376008024309948,
+ "loss": 0.4821,
+ "step": 3258
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.0001337245351953523,
+ "loss": 0.392,
+ "step": 3259
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.000133688985339222,
+ "loss": 0.413,
+ "step": 3260
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013365343067977726,
+ "loss": 0.4689,
+ "step": 3261
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013361787122208744,
+ "loss": 0.4737,
+ "step": 3262
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013358230697122246,
+ "loss": 0.5033,
+ "step": 3263
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013354673793225302,
+ "loss": 0.4901,
+ "step": 3264
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013351116411025054,
+ "loss": 0.5776,
+ "step": 3265
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013347558551028702,
+ "loss": 0.5005,
+ "step": 3266
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013344000213743522,
+ "loss": 0.6475,
+ "step": 3267
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013340441399676856,
+ "loss": 0.4394,
+ "step": 3268
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001333688210933611,
+ "loss": 0.4351,
+ "step": 3269
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001333332234322876,
+ "loss": 0.4526,
+ "step": 3270
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001332976210186236,
+ "loss": 0.3006,
+ "step": 3271
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013326201385744518,
+ "loss": 0.382,
+ "step": 3272
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013322640195382907,
+ "loss": 0.3488,
+ "step": 3273
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013319078531285285,
+ "loss": 0.5538,
+ "step": 3274
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013315516393959463,
+ "loss": 0.5328,
+ "step": 3275
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013311953783913324,
+ "loss": 0.5216,
+ "step": 3276
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001330839070165482,
+ "loss": 0.3845,
+ "step": 3277
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001330482714769197,
+ "loss": 0.5293,
+ "step": 3278
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013301263122532855,
+ "loss": 0.5415,
+ "step": 3279
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001329769862668563,
+ "loss": 0.5309,
+ "step": 3280
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013294133660658516,
+ "loss": 0.4629,
+ "step": 3281
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013290568224959794,
+ "loss": 0.4329,
+ "step": 3282
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013287002320097821,
+ "loss": 0.3973,
+ "step": 3283
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.0001328343594658102,
+ "loss": 0.3417,
+ "step": 3284
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013279869104917873,
+ "loss": 0.4784,
+ "step": 3285
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013276301795616936,
+ "loss": 0.3668,
+ "step": 3286
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.0001327273401918683,
+ "loss": 0.3726,
+ "step": 3287
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013269165776136238,
+ "loss": 0.518,
+ "step": 3288
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013265597066973922,
+ "loss": 0.3864,
+ "step": 3289
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013262027892208694,
+ "loss": 0.4249,
+ "step": 3290
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013258458252349444,
+ "loss": 0.395,
+ "step": 3291
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013254888147905126,
+ "loss": 0.8359,
+ "step": 3292
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013251317579384756,
+ "loss": 0.5028,
+ "step": 3293
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.0001324774654729742,
+ "loss": 0.4216,
+ "step": 3294
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.0001324417505215227,
+ "loss": 0.6145,
+ "step": 3295
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013240603094458522,
+ "loss": 0.6158,
+ "step": 3296
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013237030674725464,
+ "loss": 0.5101,
+ "step": 3297
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001323345779346244,
+ "loss": 0.6933,
+ "step": 3298
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001322988445117886,
+ "loss": 0.4192,
+ "step": 3299
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001322631064838422,
+ "loss": 0.4549,
+ "step": 3300
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013222736385588054,
+ "loss": 0.4947,
+ "step": 3301
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013219161663299982,
+ "loss": 0.5383,
+ "step": 3302
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013215586482029669,
+ "loss": 0.4919,
+ "step": 3303
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001321201084228687,
+ "loss": 0.4603,
+ "step": 3304
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013208434744581385,
+ "loss": 0.3127,
+ "step": 3305
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013204858189423097,
+ "loss": 0.754,
+ "step": 3306
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013201281177321935,
+ "loss": 0.3746,
+ "step": 3307
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013197703708787913,
+ "loss": 0.5576,
+ "step": 3308
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.0001319412578433109,
+ "loss": 0.4992,
+ "step": 3309
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013190547404461598,
+ "loss": 0.4533,
+ "step": 3310
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.0001318696856968965,
+ "loss": 0.4155,
+ "step": 3311
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013183389280525497,
+ "loss": 0.3661,
+ "step": 3312
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013179809537479476,
+ "loss": 0.4512,
+ "step": 3313
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013176229341061975,
+ "loss": 0.5895,
+ "step": 3314
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013172648691783454,
+ "loss": 0.3308,
+ "step": 3315
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013169067590154432,
+ "loss": 0.4128,
+ "step": 3316
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013165486036685503,
+ "loss": 0.5432,
+ "step": 3317
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001316190403188731,
+ "loss": 0.4297,
+ "step": 3318
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013158321576270575,
+ "loss": 0.4259,
+ "step": 3319
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001315473867034608,
+ "loss": 0.4428,
+ "step": 3320
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001315115531462466,
+ "loss": 0.6495,
+ "step": 3321
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013147571509617228,
+ "loss": 0.5706,
+ "step": 3322
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001314398725583476,
+ "loss": 0.3647,
+ "step": 3323
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001314040255378829,
+ "loss": 0.4864,
+ "step": 3324
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013136817403988917,
+ "loss": 0.4197,
+ "step": 3325
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013133231806947805,
+ "loss": 0.4818,
+ "step": 3326
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013129645763176184,
+ "loss": 0.4201,
+ "step": 3327
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001312605927318534,
+ "loss": 0.4352,
+ "step": 3328
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001312247233748664,
+ "loss": 0.2785,
+ "step": 3329
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001311888495659149,
+ "loss": 0.4424,
+ "step": 3330
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013115297131011382,
+ "loss": 0.4258,
+ "step": 3331
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013111708861257855,
+ "loss": 0.4332,
+ "step": 3332
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 8330,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 833,
+ "total_flos": 1.1678909660399665e+19,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-3332/trainer_state.json:com.dropbox.attrs b/checkpoint-3332/trainer_state.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..1ee9dbae7ee56dfa3b8e67f40cbf5c91984cdd5f
Binary files /dev/null and b/checkpoint-3332/trainer_state.json:com.dropbox.attrs differ
diff --git a/checkpoint-3332/training_args.bin b/checkpoint-3332/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207
--- /dev/null
+++ b/checkpoint-3332/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869
+size 4859
diff --git a/checkpoint-3332/training_args.bin:com.dropbox.attrs b/checkpoint-3332/training_args.bin:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..ffe32c070622a02ccf62b565de31130fb16609ce
Binary files /dev/null and b/checkpoint-3332/training_args.bin:com.dropbox.attrs differ
diff --git a/checkpoint-4165/README.md b/checkpoint-4165/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda
--- /dev/null
+++ b/checkpoint-4165/README.md
@@ -0,0 +1,218 @@
+---
+library_name: peft
+base_model: mistralai/Mixtral-8x7B-v0.1
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+## Training procedure
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+
+### Framework versions
+
+- PEFT 0.7.0
\ No newline at end of file
diff --git a/checkpoint-4165/README.md:com.dropbox.attrs b/checkpoint-4165/README.md:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..742d48d310ed788c62650b5e2b05a5984862117e
Binary files /dev/null and b/checkpoint-4165/README.md:com.dropbox.attrs differ
diff --git a/checkpoint-4165/adapter_config.json b/checkpoint-4165/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a
--- /dev/null
+++ b/checkpoint-4165/adapter_config.json
@@ -0,0 +1,32 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 64,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "k_proj",
+ "w1",
+ "gate",
+ "w2",
+ "q_proj",
+ "w3",
+ "o_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-4165/adapter_config.json:com.dropbox.attrs b/checkpoint-4165/adapter_config.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..73d0543343fcc90a29326359185ac5c839148b65
Binary files /dev/null and b/checkpoint-4165/adapter_config.json:com.dropbox.attrs differ
diff --git a/checkpoint-4165/adapter_model.safetensors b/checkpoint-4165/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a20e4ab9a1878d232b5cae6f89086006cacf2ada
--- /dev/null
+++ b/checkpoint-4165/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75e8409fd75d6737abc14ae7d214c4b6235759b1472d5fc69a6e2bf87d2150df
+size 3875879784
diff --git a/checkpoint-4165/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-4165/adapter_model.safetensors:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..d52bcc89b53d9461c74fef2786aa8233ab654fd2
Binary files /dev/null and b/checkpoint-4165/adapter_model.safetensors:com.dropbox.attrs differ
diff --git a/checkpoint-4165/optimizer.pt b/checkpoint-4165/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2142de25ad7fbabff81907d1943147a525be5df
--- /dev/null
+++ b/checkpoint-4165/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:392e12045d33cf98614290b947d1350cb61f0283f17168b7aa9a5591c8866410
+size 1943844127
diff --git a/checkpoint-4165/optimizer.pt:com.dropbox.attrs b/checkpoint-4165/optimizer.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..86300b00d29a9d85f88c14ca896987cc6f09786a
Binary files /dev/null and b/checkpoint-4165/optimizer.pt:com.dropbox.attrs differ
diff --git a/checkpoint-4165/rng_state.pth b/checkpoint-4165/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..73f93be7fad32b80faea3d812a3f621b43e8af81
--- /dev/null
+++ b/checkpoint-4165/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11a5fc4dc6499a7d54d02a029e272d15defccd92d64b2103b2defa9358c5cb2c
+size 14575
diff --git a/checkpoint-4165/rng_state.pth:com.dropbox.attrs b/checkpoint-4165/rng_state.pth:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..efdb558e4d125445320b019bca01d63dd63e75e3
Binary files /dev/null and b/checkpoint-4165/rng_state.pth:com.dropbox.attrs differ
diff --git a/checkpoint-4165/scheduler.pt b/checkpoint-4165/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec0ce657afa6e8e81abadf06cb9561e11b4083f8
--- /dev/null
+++ b/checkpoint-4165/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:844122fb3255d7552c2d49de519b6d737b8391693908ec71f80d185bffa3d134
+size 627
diff --git a/checkpoint-4165/scheduler.pt:com.dropbox.attrs b/checkpoint-4165/scheduler.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..ee72b37d76f9be9cd7760c13d4fd58d9f266e3b8
Binary files /dev/null and b/checkpoint-4165/scheduler.pt:com.dropbox.attrs differ
diff --git a/checkpoint-4165/trainer_state.json b/checkpoint-4165/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..714a59af08793c20f90a58478160d2a5c4c7f7f6
--- /dev/null
+++ b/checkpoint-4165/trainer_state.json
@@ -0,0 +1,25171 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 4.942376950780313,
+ "eval_steps": 209,
+ "global_step": 4165,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0,
+ "learning_rate": 2e-05,
+ "loss": 2.1426,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "eval_loss": 2.071432113647461,
+ "eval_runtime": 279.6718,
+ "eval_samples_per_second": 0.737,
+ "eval_steps_per_second": 0.737,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4e-05,
+ "loss": 2.4033,
+ "step": 2
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 6e-05,
+ "loss": 2.1893,
+ "step": 3
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 8e-05,
+ "loss": 2.3226,
+ "step": 4
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0001,
+ "loss": 2.2485,
+ "step": 5
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00012,
+ "loss": 1.9704,
+ "step": 6
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00014,
+ "loss": 1.6929,
+ "step": 7
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00016,
+ "loss": 2.2957,
+ "step": 8
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00018,
+ "loss": 1.9907,
+ "step": 9
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0002,
+ "loss": 2.1295,
+ "step": 10
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999999287109068,
+ "loss": 2.2249,
+ "step": 11
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999997148436365,
+ "loss": 2.1733,
+ "step": 12
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.000199999935839822,
+ "loss": 2.1404,
+ "step": 13
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999988593747084,
+ "loss": 2.0236,
+ "step": 14
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999982177731722,
+ "loss": 1.9639,
+ "step": 15
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999974335937034,
+ "loss": 1.692,
+ "step": 16
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999965068364137,
+ "loss": 2.3609,
+ "step": 17
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999954375014348,
+ "loss": 2.3553,
+ "step": 18
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999942255889198,
+ "loss": 1.5733,
+ "step": 19
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999928710990412,
+ "loss": 1.7505,
+ "step": 20
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999913740319922,
+ "loss": 2.3068,
+ "step": 21
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999897343879862,
+ "loss": 1.8371,
+ "step": 22
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.0001999987952167257,
+ "loss": 1.9852,
+ "step": 23
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999860273700585,
+ "loss": 1.9625,
+ "step": 24
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999839599966655,
+ "loss": 2.1089,
+ "step": 25
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999817500473724,
+ "loss": 2.1086,
+ "step": 26
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999793975224945,
+ "loss": 2.0284,
+ "step": 27
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999769024223673,
+ "loss": 2.3641,
+ "step": 28
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999742647473464,
+ "loss": 1.963,
+ "step": 29
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999714844978078,
+ "loss": 2.0635,
+ "step": 30
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999968561674148,
+ "loss": 1.9304,
+ "step": 31
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999654962767839,
+ "loss": 1.4124,
+ "step": 32
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999622883061518,
+ "loss": 2.1444,
+ "step": 33
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999589377627102,
+ "loss": 1.6477,
+ "step": 34
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999955444646936,
+ "loss": 2.2601,
+ "step": 35
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999518089593282,
+ "loss": 1.6256,
+ "step": 36
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999948030700404,
+ "loss": 1.9155,
+ "step": 37
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999441098707025,
+ "loss": 2.1408,
+ "step": 38
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999400464707832,
+ "loss": 2.104,
+ "step": 39
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.0001999935840501225,
+ "loss": 1.9841,
+ "step": 40
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999314919626272,
+ "loss": 1.5924,
+ "step": 41
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999270008556108,
+ "loss": 1.9956,
+ "step": 42
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999223671808154,
+ "loss": 1.4673,
+ "step": 43
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999175909389018,
+ "loss": 2.1595,
+ "step": 44
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999126721305513,
+ "loss": 1.8439,
+ "step": 45
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999076107564648,
+ "loss": 1.9961,
+ "step": 46
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999024068173638,
+ "loss": 2.1504,
+ "step": 47
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998970603139912,
+ "loss": 2.2907,
+ "step": 48
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999891571247108,
+ "loss": 1.5709,
+ "step": 49
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999885939617498,
+ "loss": 2.4504,
+ "step": 50
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998801654259632,
+ "loss": 2.3787,
+ "step": 51
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999874248673328,
+ "loss": 2.0434,
+ "step": 52
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998681893604347,
+ "loss": 2.1671,
+ "step": 53
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999861987488148,
+ "loss": 1.7432,
+ "step": 54
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998556430573521,
+ "loss": 1.7737,
+ "step": 55
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998491560689513,
+ "loss": 2.0122,
+ "step": 56
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999842526523871,
+ "loss": 1.7545,
+ "step": 57
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998357544230558,
+ "loss": 2.201,
+ "step": 58
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998288397674716,
+ "loss": 2.0396,
+ "step": 59
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999821782558104,
+ "loss": 1.9275,
+ "step": 60
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998145827959598,
+ "loss": 1.7797,
+ "step": 61
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999807240482065,
+ "loss": 2.1463,
+ "step": 62
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997997556174665,
+ "loss": 1.935,
+ "step": 63
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999792128203232,
+ "loss": 2.1182,
+ "step": 64
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999784358240448,
+ "loss": 2.2297,
+ "step": 65
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997764457302234,
+ "loss": 2.1052,
+ "step": 66
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999768390673686,
+ "loss": 2.0777,
+ "step": 67
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997601930719835,
+ "loss": 2.1419,
+ "step": 68
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999751852926286,
+ "loss": 2.2586,
+ "step": 69
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997433702377817,
+ "loss": 1.9089,
+ "step": 70
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997347450076801,
+ "loss": 2.0587,
+ "step": 71
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997259772372116,
+ "loss": 2.4143,
+ "step": 72
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997170669276256,
+ "loss": 1.947,
+ "step": 73
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997080140801932,
+ "loss": 2.008,
+ "step": 74
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996988186962041,
+ "loss": 2.4912,
+ "step": 75
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996894807769707,
+ "loss": 2.0279,
+ "step": 76
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996800003238232,
+ "loss": 1.9914,
+ "step": 77
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.0001999670377338114,
+ "loss": 1.9091,
+ "step": 78
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996606118212148,
+ "loss": 1.8038,
+ "step": 79
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996507037745183,
+ "loss": 2.3573,
+ "step": 80
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996406531994364,
+ "loss": 2.3204,
+ "step": 81
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999630460097403,
+ "loss": 2.1619,
+ "step": 82
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999620124469871,
+ "loss": 1.9977,
+ "step": 83
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996096463183142,
+ "loss": 2.195,
+ "step": 84
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995990256442263,
+ "loss": 1.9909,
+ "step": 85
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995882624491217,
+ "loss": 2.2001,
+ "step": 86
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995773567345354,
+ "loss": 1.5795,
+ "step": 87
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995663085020212,
+ "loss": 2.174,
+ "step": 88
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995551177531557,
+ "loss": 1.9605,
+ "step": 89
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995437844895334,
+ "loss": 2.1768,
+ "step": 90
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999532308712771,
+ "loss": 1.6906,
+ "step": 91
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995206904245037,
+ "loss": 2.1029,
+ "step": 92
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995089296263893,
+ "loss": 2.0652,
+ "step": 93
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019994970263201035,
+ "loss": 2.1733,
+ "step": 94
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999484980507344,
+ "loss": 1.9413,
+ "step": 95
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999472792189828,
+ "loss": 1.9538,
+ "step": 96
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994604613692935,
+ "loss": 2.4158,
+ "step": 97
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994479880474988,
+ "loss": 1.8964,
+ "step": 98
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999435372226222,
+ "loss": 2.3135,
+ "step": 99
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999422613907262,
+ "loss": 2.127,
+ "step": 100
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994097130924374,
+ "loss": 1.9954,
+ "step": 101
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993966697835883,
+ "loss": 2.1363,
+ "step": 102
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993834839825738,
+ "loss": 1.7779,
+ "step": 103
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993701556912742,
+ "loss": 2.0923,
+ "step": 104
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993566849115898,
+ "loss": 1.9183,
+ "step": 105
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993430716454413,
+ "loss": 1.7894,
+ "step": 106
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993293158947694,
+ "loss": 2.0094,
+ "step": 107
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999315417661536,
+ "loss": 2.1469,
+ "step": 108
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999301376947722,
+ "loss": 1.6924,
+ "step": 109
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999287193755329,
+ "loss": 2.1794,
+ "step": 110
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.000199927286808638,
+ "loss": 2.1338,
+ "step": 111
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019992583999429178,
+ "loss": 1.9988,
+ "step": 112
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999243789327004,
+ "loss": 2.0735,
+ "step": 113
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999229036240723,
+ "loss": 2.0521,
+ "step": 114
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019992141406861776,
+ "loss": 1.9441,
+ "step": 115
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991991026654918,
+ "loss": 2.1244,
+ "step": 116
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999183922180809,
+ "loss": 1.7937,
+ "step": 117
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999168599234295,
+ "loss": 2.2603,
+ "step": 118
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991531338281332,
+ "loss": 2.1846,
+ "step": 119
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991375259645293,
+ "loss": 2.3241,
+ "step": 120
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991217756457085,
+ "loss": 2.0926,
+ "step": 121
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991058828739165,
+ "loss": 2.0092,
+ "step": 122
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990898476514193,
+ "loss": 1.8076,
+ "step": 123
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990736699805029,
+ "loss": 2.0369,
+ "step": 124
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990573498634742,
+ "loss": 2.0488,
+ "step": 125
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.000199904088730266,
+ "loss": 2.1534,
+ "step": 126
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990242823004074,
+ "loss": 2.1406,
+ "step": 127
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990075348590839,
+ "loss": 1.9379,
+ "step": 128
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019989906449810775,
+ "loss": 1.9781,
+ "step": 129
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989736126687963,
+ "loss": 1.973,
+ "step": 130
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989564379246683,
+ "loss": 1.6825,
+ "step": 131
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989391207511428,
+ "loss": 2.0843,
+ "step": 132
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989216611506887,
+ "loss": 1.8547,
+ "step": 133
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989040591257952,
+ "loss": 1.7626,
+ "step": 134
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.0001998886314678972,
+ "loss": 2.0531,
+ "step": 135
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988684278127497,
+ "loss": 2.0031,
+ "step": 136
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988503985296773,
+ "loss": 1.9342,
+ "step": 137
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988322268323268,
+ "loss": 2.3297,
+ "step": 138
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988139127232878,
+ "loss": 2.3401,
+ "step": 139
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987954562051725,
+ "loss": 1.8983,
+ "step": 140
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.0001998776857280612,
+ "loss": 2.0621,
+ "step": 141
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987581159522578,
+ "loss": 2.0574,
+ "step": 142
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987392322227824,
+ "loss": 1.9516,
+ "step": 143
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987202060948783,
+ "loss": 2.1402,
+ "step": 144
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987010375712577,
+ "loss": 1.8903,
+ "step": 145
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986817266546539,
+ "loss": 1.8248,
+ "step": 146
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986622733478204,
+ "loss": 1.9877,
+ "step": 147
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986426776535306,
+ "loss": 1.6272,
+ "step": 148
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986229395745785,
+ "loss": 1.8605,
+ "step": 149
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986030591137783,
+ "loss": 1.6848,
+ "step": 150
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985830362739647,
+ "loss": 2.1922,
+ "step": 151
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998562871057992,
+ "loss": 2.0238,
+ "step": 152
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998542563468736,
+ "loss": 2.2246,
+ "step": 153
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985221135090914,
+ "loss": 1.9438,
+ "step": 154
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019985015211819744,
+ "loss": 2.2136,
+ "step": 155
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998480786490321,
+ "loss": 2.4563,
+ "step": 156
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984599094370874,
+ "loss": 2.2138,
+ "step": 157
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984388900252503,
+ "loss": 2.2679,
+ "step": 158
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984177282578064,
+ "loss": 1.9537,
+ "step": 159
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998396424137773,
+ "loss": 2.0803,
+ "step": 160
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998374977668188,
+ "loss": 2.0282,
+ "step": 161
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019983533888521087,
+ "loss": 2.0157,
+ "step": 162
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998331657692613,
+ "loss": 1.7837,
+ "step": 163
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019983097841928,
+ "loss": 2.1556,
+ "step": 164
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982877683557879,
+ "loss": 2.1447,
+ "step": 165
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982656101847162,
+ "loss": 2.4139,
+ "step": 166
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998243309682743,
+ "loss": 1.6788,
+ "step": 167
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982208668530493,
+ "loss": 1.9008,
+ "step": 168
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998198281698834,
+ "loss": 2.173,
+ "step": 169
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019981755542233177,
+ "loss": 2.1837,
+ "step": 170
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981526844297404,
+ "loss": 2.0639,
+ "step": 171
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981296723213632,
+ "loss": 2.3864,
+ "step": 172
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981065179014673,
+ "loss": 1.923,
+ "step": 173
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980832211733535,
+ "loss": 1.9192,
+ "step": 174
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980597821403438,
+ "loss": 2.0335,
+ "step": 175
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.000199803620080578,
+ "loss": 1.8172,
+ "step": 176
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.0001998012477173024,
+ "loss": 2.0294,
+ "step": 177
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979886112454586,
+ "loss": 2.2889,
+ "step": 178
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979646030264867,
+ "loss": 1.8498,
+ "step": 179
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997940452519531,
+ "loss": 2.0797,
+ "step": 180
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997916159728035,
+ "loss": 2.2356,
+ "step": 181
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997891724655462,
+ "loss": 2.1187,
+ "step": 182
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978671473052964,
+ "loss": 1.9301,
+ "step": 183
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978424276810423,
+ "loss": 1.8582,
+ "step": 184
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997817565786224,
+ "loss": 2.144,
+ "step": 185
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977925616243862,
+ "loss": 2.0595,
+ "step": 186
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977674151990945,
+ "loss": 1.9104,
+ "step": 187
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977421265139332,
+ "loss": 1.9727,
+ "step": 188
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977166955725088,
+ "loss": 1.8727,
+ "step": 189
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997691122378447,
+ "loss": 2.0611,
+ "step": 190
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997665406935394,
+ "loss": 2.0745,
+ "step": 191
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997639549247016,
+ "loss": 1.9974,
+ "step": 192
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019976135493169996,
+ "loss": 1.9856,
+ "step": 193
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975874071490526,
+ "loss": 1.778,
+ "step": 194
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975611227469016,
+ "loss": 1.8347,
+ "step": 195
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997534696114294,
+ "loss": 1.5555,
+ "step": 196
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019975081272549989,
+ "loss": 1.5625,
+ "step": 197
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974814161728032,
+ "loss": 1.9997,
+ "step": 198
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974545628715157,
+ "loss": 1.9523,
+ "step": 199
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974275673549654,
+ "loss": 2.1557,
+ "step": 200
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974004296270006,
+ "loss": 1.8306,
+ "step": 201
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973731496914914,
+ "loss": 2.0051,
+ "step": 202
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973457275523264,
+ "loss": 2.201,
+ "step": 203
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997318163213416,
+ "loss": 2.2446,
+ "step": 204
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972904566786903,
+ "loss": 2.1172,
+ "step": 205
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972626079520995,
+ "loss": 1.9849,
+ "step": 206
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972346170376142,
+ "loss": 1.9774,
+ "step": 207
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.0001997206483939225,
+ "loss": 1.7625,
+ "step": 208
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971782086609436,
+ "loss": 2.2346,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "eval_loss": 2.00066876411438,
+ "eval_runtime": 282.7648,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971497912068013,
+ "loss": 2.4185,
+ "step": 210
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971212315808497,
+ "loss": 1.946,
+ "step": 211
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019970925297871605,
+ "loss": 2.0049,
+ "step": 212
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970636858298267,
+ "loss": 1.9545,
+ "step": 213
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970346997129598,
+ "loss": 1.9636,
+ "step": 214
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970055714406938,
+ "loss": 1.9068,
+ "step": 215
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969763010171807,
+ "loss": 1.5749,
+ "step": 216
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969468884465942,
+ "loss": 1.7676,
+ "step": 217
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996917333733128,
+ "loss": 2.0329,
+ "step": 218
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996887636880996,
+ "loss": 1.9307,
+ "step": 219
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019968577978944323,
+ "loss": 2.134,
+ "step": 220
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019968278167776908,
+ "loss": 2.0911,
+ "step": 221
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967976935350467,
+ "loss": 2.5057,
+ "step": 222
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.0001996767428170795,
+ "loss": 1.9267,
+ "step": 223
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967370206892503,
+ "loss": 2.3569,
+ "step": 224
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967064710947488,
+ "loss": 1.992,
+ "step": 225
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966757793916454,
+ "loss": 2.01,
+ "step": 226
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966449455843165,
+ "loss": 1.8037,
+ "step": 227
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966139696771587,
+ "loss": 2.2498,
+ "step": 228
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019965828516745876,
+ "loss": 1.6563,
+ "step": 229
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996551591581041,
+ "loss": 1.979,
+ "step": 230
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996520189400975,
+ "loss": 2.1553,
+ "step": 231
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996488645138867,
+ "loss": 1.8743,
+ "step": 232
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964569587992148,
+ "loss": 2.1907,
+ "step": 233
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964251303865362,
+ "loss": 2.0644,
+ "step": 234
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019963931599053692,
+ "loss": 2.1721,
+ "step": 235
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996361047360272,
+ "loss": 2.2267,
+ "step": 236
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996328792755823,
+ "loss": 1.9445,
+ "step": 237
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019962963960966213,
+ "loss": 2.2003,
+ "step": 238
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996263857387286,
+ "loss": 2.3114,
+ "step": 239
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996231176632456,
+ "loss": 1.8553,
+ "step": 240
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961983538367914,
+ "loss": 2.1349,
+ "step": 241
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961653890049715,
+ "loss": 1.8784,
+ "step": 242
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996132282141697,
+ "loss": 2.0118,
+ "step": 243
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960990332516874,
+ "loss": 1.9938,
+ "step": 244
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960656423396834,
+ "loss": 2.2582,
+ "step": 245
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019960321094104465,
+ "loss": 2.1807,
+ "step": 246
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959984344687578,
+ "loss": 1.9084,
+ "step": 247
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959646175194174,
+ "loss": 2.2879,
+ "step": 248
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995930658567248,
+ "loss": 1.942,
+ "step": 249
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958965576170908,
+ "loss": 2.1313,
+ "step": 250
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958623146738088,
+ "loss": 2.3202,
+ "step": 251
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995827929742283,
+ "loss": 1.7832,
+ "step": 252
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957934028274162,
+ "loss": 1.7103,
+ "step": 253
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957587339341321,
+ "loss": 1.9912,
+ "step": 254
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995723923067373,
+ "loss": 1.6686,
+ "step": 255
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956889702321023,
+ "loss": 1.966,
+ "step": 256
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956538754333034,
+ "loss": 2.2287,
+ "step": 257
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956186386759804,
+ "loss": 1.4866,
+ "step": 258
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995583259965157,
+ "loss": 1.9599,
+ "step": 259
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019955477393058773,
+ "loss": 1.9273,
+ "step": 260
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995512076703206,
+ "loss": 1.847,
+ "step": 261
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019954762721622279,
+ "loss": 2.0535,
+ "step": 262
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995440325688048,
+ "loss": 2.4403,
+ "step": 263
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019954042372857908,
+ "loss": 1.8712,
+ "step": 264
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953680069606026,
+ "loss": 2.1837,
+ "step": 265
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953316347176488,
+ "loss": 2.0398,
+ "step": 266
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995295120562115,
+ "loss": 2.1135,
+ "step": 267
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952584644992075,
+ "loss": 2.0358,
+ "step": 268
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952216665341526,
+ "loss": 2.3282,
+ "step": 269
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995184726672197,
+ "loss": 1.9741,
+ "step": 270
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951476449186074,
+ "loss": 1.7523,
+ "step": 271
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951104212786712,
+ "loss": 2.1509,
+ "step": 272
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001995073055757695,
+ "loss": 2.0865,
+ "step": 273
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019950355483610067,
+ "loss": 1.8972,
+ "step": 274
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019949978990939542,
+ "loss": 2.4693,
+ "step": 275
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994960107961905,
+ "loss": 1.9307,
+ "step": 276
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994922174970248,
+ "loss": 2.0097,
+ "step": 277
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994884100124391,
+ "loss": 1.6561,
+ "step": 278
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994845883429763,
+ "loss": 2.3069,
+ "step": 279
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019948075248918124,
+ "loss": 2.0134,
+ "step": 280
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947690245160091,
+ "loss": 2.1061,
+ "step": 281
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947303823078416,
+ "loss": 2.0855,
+ "step": 282
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946915982728197,
+ "loss": 1.5672,
+ "step": 283
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.0001994652672416473,
+ "loss": 1.7289,
+ "step": 284
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946136047443522,
+ "loss": 1.9013,
+ "step": 285
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945743952620268,
+ "loss": 2.3105,
+ "step": 286
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945350439750872,
+ "loss": 2.341,
+ "step": 287
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944955508891443,
+ "loss": 1.88,
+ "step": 288
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994455916009829,
+ "loss": 1.913,
+ "step": 289
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944161393427922,
+ "loss": 1.9513,
+ "step": 290
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943762208937053,
+ "loss": 2.3331,
+ "step": 291
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943361606682597,
+ "loss": 2.3024,
+ "step": 292
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942959586721672,
+ "loss": 2.2222,
+ "step": 293
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942556149111598,
+ "loss": 2.1003,
+ "step": 294
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994215129390989,
+ "loss": 1.9038,
+ "step": 295
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941745021174282,
+ "loss": 1.6068,
+ "step": 296
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941337330962693,
+ "loss": 1.8894,
+ "step": 297
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940928223333252,
+ "loss": 2.3158,
+ "step": 298
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.0001994051769834429,
+ "loss": 2.1015,
+ "step": 299
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940105756054337,
+ "loss": 2.1519,
+ "step": 300
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939692396522127,
+ "loss": 1.7233,
+ "step": 301
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939277619806598,
+ "loss": 1.85,
+ "step": 302
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938861425966887,
+ "loss": 2.2368,
+ "step": 303
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938443815062335,
+ "loss": 1.765,
+ "step": 304
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993802478715248,
+ "loss": 1.6333,
+ "step": 305
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937604342297073,
+ "loss": 2.191,
+ "step": 306
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937182480556055,
+ "loss": 2.2402,
+ "step": 307
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019936759201989577,
+ "loss": 2.0568,
+ "step": 308
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993633450665799,
+ "loss": 2.4314,
+ "step": 309
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935908394621844,
+ "loss": 2.0556,
+ "step": 310
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935480865941894,
+ "loss": 2.0988,
+ "step": 311
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935051920679094,
+ "loss": 2.0964,
+ "step": 312
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019934621558894607,
+ "loss": 1.9365,
+ "step": 313
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993418978064979,
+ "loss": 1.6224,
+ "step": 314
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933756586006202,
+ "loss": 2.144,
+ "step": 315
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933321975025616,
+ "loss": 2.2899,
+ "step": 316
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019932885947769992,
+ "loss": 1.8865,
+ "step": 317
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.000199324485043015,
+ "loss": 2.3996,
+ "step": 318
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993200964468251,
+ "loss": 1.3858,
+ "step": 319
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019931569368975588,
+ "loss": 2.2231,
+ "step": 320
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019931127677243516,
+ "loss": 2.0537,
+ "step": 321
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930684569549264,
+ "loss": 2.1381,
+ "step": 322
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930240045956012,
+ "loss": 2.0152,
+ "step": 323
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992979410652714,
+ "loss": 2.0293,
+ "step": 324
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019929346751326228,
+ "loss": 1.7457,
+ "step": 325
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928897980417057,
+ "loss": 1.987,
+ "step": 326
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928447793863616,
+ "loss": 2.2451,
+ "step": 327
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019927996191730093,
+ "loss": 2.3312,
+ "step": 328
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992754317408087,
+ "loss": 1.8771,
+ "step": 329
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992708874098054,
+ "loss": 1.833,
+ "step": 330
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926632892493896,
+ "loss": 1.9343,
+ "step": 331
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926175628685937,
+ "loss": 2.2328,
+ "step": 332
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992571694962185,
+ "loss": 1.9916,
+ "step": 333
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992525685536704,
+ "loss": 1.9497,
+ "step": 334
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.000199247953459871,
+ "loss": 2.029,
+ "step": 335
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019924332421547835,
+ "loss": 2.0326,
+ "step": 336
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992386808211525,
+ "loss": 2.6406,
+ "step": 337
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019923402327755546,
+ "loss": 2.3811,
+ "step": 338
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922935158535129,
+ "loss": 1.6143,
+ "step": 339
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922466574520608,
+ "loss": 2.2182,
+ "step": 340
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921996575778794,
+ "loss": 2.218,
+ "step": 341
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.000199215251623767,
+ "loss": 1.8615,
+ "step": 342
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921052334381534,
+ "loss": 2.165,
+ "step": 343
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019920578091860716,
+ "loss": 2.1627,
+ "step": 344
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.0001992010243488186,
+ "loss": 2.154,
+ "step": 345
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919625363512786,
+ "loss": 1.5966,
+ "step": 346
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919146877821512,
+ "loss": 2.0903,
+ "step": 347
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991866697787626,
+ "loss": 2.2322,
+ "step": 348
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019918185663745456,
+ "loss": 1.9319,
+ "step": 349
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917702935497725,
+ "loss": 2.1367,
+ "step": 350
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917218793201886,
+ "loss": 2.1767,
+ "step": 351
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019916733236926976,
+ "loss": 2.1009,
+ "step": 352
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991624626674222,
+ "loss": 2.1286,
+ "step": 353
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991575788271705,
+ "loss": 2.181,
+ "step": 354
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019915268084921101,
+ "loss": 2.12,
+ "step": 355
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019914776873424206,
+ "loss": 1.9895,
+ "step": 356
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.000199142842482964,
+ "loss": 1.9285,
+ "step": 357
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991379020960792,
+ "loss": 2.2376,
+ "step": 358
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991329475742921,
+ "loss": 2.1274,
+ "step": 359
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912797891830908,
+ "loss": 2.0043,
+ "step": 360
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912299612883852,
+ "loss": 2.022,
+ "step": 361
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019911799920659093,
+ "loss": 1.7343,
+ "step": 362
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991129881522787,
+ "loss": 2.0621,
+ "step": 363
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019910796296661632,
+ "loss": 1.5116,
+ "step": 364
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991029236503203,
+ "loss": 2.0485,
+ "step": 365
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909787020410907,
+ "loss": 1.971,
+ "step": 366
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909280262870324,
+ "loss": 1.9724,
+ "step": 367
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908772092482524,
+ "loss": 1.318,
+ "step": 368
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908262509319964,
+ "loss": 2.0539,
+ "step": 369
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019907751513455302,
+ "loss": 2.1097,
+ "step": 370
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019907239104961392,
+ "loss": 2.0632,
+ "step": 371
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906725283911296,
+ "loss": 2.1897,
+ "step": 372
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906210050378266,
+ "loss": 2.2002,
+ "step": 373
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905693404435773,
+ "loss": 1.9005,
+ "step": 374
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905175346157474,
+ "loss": 1.9873,
+ "step": 375
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904655875617233,
+ "loss": 1.7215,
+ "step": 376
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904134992889113,
+ "loss": 2.0434,
+ "step": 377
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903612698047383,
+ "loss": 2.4223,
+ "step": 378
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903088991166513,
+ "loss": 2.0837,
+ "step": 379
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902563872321172,
+ "loss": 2.2389,
+ "step": 380
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902037341586225,
+ "loss": 1.7205,
+ "step": 381
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001990150939903675,
+ "loss": 1.9577,
+ "step": 382
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019900980044748015,
+ "loss": 1.8778,
+ "step": 383
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.000199004492787955,
+ "loss": 2.2213,
+ "step": 384
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899917101254874,
+ "loss": 2.0927,
+ "step": 385
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899383512202019,
+ "loss": 2.2921,
+ "step": 386
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001989884851171301,
+ "loss": 2.2983,
+ "step": 387
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989831209986413,
+ "loss": 1.8052,
+ "step": 388
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897774276731857,
+ "loss": 1.7741,
+ "step": 389
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897235042392873,
+ "loss": 1.779,
+ "step": 390
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896694396924063,
+ "loss": 1.6924,
+ "step": 391
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896152340402509,
+ "loss": 2.036,
+ "step": 392
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019895608872905494,
+ "loss": 2.04,
+ "step": 393
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989506399451051,
+ "loss": 2.1702,
+ "step": 394
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019894517705295245,
+ "loss": 1.9429,
+ "step": 395
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893970005337584,
+ "loss": 2.0528,
+ "step": 396
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893420894715618,
+ "loss": 1.7906,
+ "step": 397
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989287037350764,
+ "loss": 2.3494,
+ "step": 398
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019892318441792138,
+ "loss": 1.7415,
+ "step": 399
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989176509964781,
+ "loss": 2.0184,
+ "step": 400
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989121034715355,
+ "loss": 1.9277,
+ "step": 401
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989065418438845,
+ "loss": 2.2168,
+ "step": 402
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019890096611431814,
+ "loss": 2.6114,
+ "step": 403
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019889537628363133,
+ "loss": 2.0713,
+ "step": 404
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888977235262104,
+ "loss": 2.2966,
+ "step": 405
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888415432208636,
+ "loss": 2.5206,
+ "step": 406
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887852219282822,
+ "loss": 2.4503,
+ "step": 407
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887287596564966,
+ "loss": 2.102,
+ "step": 408
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886721564135572,
+ "loss": 2.3275,
+ "step": 409
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886154122075343,
+ "loss": 2.0481,
+ "step": 410
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885585270465182,
+ "loss": 1.8395,
+ "step": 411
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885015009386202,
+ "loss": 2.3535,
+ "step": 412
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.000198844433389197,
+ "loss": 2.0147,
+ "step": 413
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988387025914719,
+ "loss": 2.1919,
+ "step": 414
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988329577015038,
+ "loss": 2.156,
+ "step": 415
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882719872011176,
+ "loss": 2.2672,
+ "step": 416
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882142564811694,
+ "loss": 2.3242,
+ "step": 417
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988156384863424,
+ "loss": 2.0259,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "eval_loss": 1.9941134452819824,
+ "eval_runtime": 282.533,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880983723561332,
+ "loss": 1.7039,
+ "step": 419
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880402189675678,
+ "loss": 2.1007,
+ "step": 420
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879819247060193,
+ "loss": 2.2297,
+ "step": 421
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879234895797996,
+ "loss": 1.6166,
+ "step": 422
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.000198786491359724,
+ "loss": 2.408,
+ "step": 423
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019878061967666915,
+ "loss": 1.686,
+ "step": 424
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.0001987747339096527,
+ "loss": 2.0492,
+ "step": 425
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876883405951377,
+ "loss": 2.2179,
+ "step": 426
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876292012709356,
+ "loss": 1.8812,
+ "step": 427
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019875699211323528,
+ "loss": 2.2888,
+ "step": 428
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019875105001878409,
+ "loss": 2.0561,
+ "step": 429
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019874509384458725,
+ "loss": 1.9299,
+ "step": 430
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873912359149397,
+ "loss": 2.1999,
+ "step": 431
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873313926035548,
+ "loss": 1.8509,
+ "step": 432
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019872714085202503,
+ "loss": 1.8281,
+ "step": 433
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987211283673578,
+ "loss": 1.8359,
+ "step": 434
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987151018072111,
+ "loss": 2.2844,
+ "step": 435
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870906117244416,
+ "loss": 1.9397,
+ "step": 436
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870300646391824,
+ "loss": 2.302,
+ "step": 437
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869693768249661,
+ "loss": 2.1176,
+ "step": 438
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869085482904458,
+ "loss": 2.1909,
+ "step": 439
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986847579044294,
+ "loss": 2.2382,
+ "step": 440
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867864690952035,
+ "loss": 2.0988,
+ "step": 441
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867252184518878,
+ "loss": 2.2136,
+ "step": 442
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986663827123079,
+ "loss": 1.9324,
+ "step": 443
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019866022951175308,
+ "loss": 2.1274,
+ "step": 444
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019865406224440165,
+ "loss": 1.8625,
+ "step": 445
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019864788091113287,
+ "loss": 2.0009,
+ "step": 446
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.0001986416855128281,
+ "loss": 2.2245,
+ "step": 447
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019863547605037063,
+ "loss": 2.0654,
+ "step": 448
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862925252464586,
+ "loss": 1.4339,
+ "step": 449
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862301493654108,
+ "loss": 2.1347,
+ "step": 450
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861676328694562,
+ "loss": 1.7029,
+ "step": 451
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861049757675088,
+ "loss": 2.0081,
+ "step": 452
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019860421780685018,
+ "loss": 1.9994,
+ "step": 453
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985979239781389,
+ "loss": 1.9325,
+ "step": 454
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019859161609151436,
+ "loss": 1.8502,
+ "step": 455
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.000198585294147876,
+ "loss": 2.3779,
+ "step": 456
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019857895814812509,
+ "loss": 2.0303,
+ "step": 457
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985726080931651,
+ "loss": 1.9898,
+ "step": 458
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019856624398390137,
+ "loss": 1.7648,
+ "step": 459
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019855986582124126,
+ "loss": 1.7822,
+ "step": 460
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985534736060942,
+ "loss": 1.9219,
+ "step": 461
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019854706733937155,
+ "loss": 2.1789,
+ "step": 462
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019854064702198675,
+ "loss": 1.9091,
+ "step": 463
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019853421265485514,
+ "loss": 1.9941,
+ "step": 464
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985277642388941,
+ "loss": 1.904,
+ "step": 465
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019852130177502316,
+ "loss": 1.6299,
+ "step": 466
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985148252641636,
+ "loss": 1.7712,
+ "step": 467
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019850833470723886,
+ "loss": 1.6825,
+ "step": 468
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985018301051744,
+ "loss": 1.7408,
+ "step": 469
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019849531145889758,
+ "loss": 2.0622,
+ "step": 470
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019848877876933784,
+ "loss": 1.5699,
+ "step": 471
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.0001984822320374266,
+ "loss": 2.0253,
+ "step": 472
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019847567126409724,
+ "loss": 2.2186,
+ "step": 473
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019846909645028523,
+ "loss": 2.0872,
+ "step": 474
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.000198462507596928,
+ "loss": 1.9362,
+ "step": 475
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019845590470496497,
+ "loss": 2.4109,
+ "step": 476
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844928777533753,
+ "loss": 2.2626,
+ "step": 477
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844265680898918,
+ "loss": 2.0874,
+ "step": 478
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984360118068653,
+ "loss": 2.1606,
+ "step": 479
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984293527699133,
+ "loss": 2.063,
+ "step": 480
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019842267969908265,
+ "loss": 1.9065,
+ "step": 481
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984159925953248,
+ "loss": 1.9511,
+ "step": 482
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840929145959317,
+ "loss": 2.056,
+ "step": 483
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840257629284317,
+ "loss": 2.2353,
+ "step": 484
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019839584709603226,
+ "loss": 1.9401,
+ "step": 485
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001983891038701199,
+ "loss": 1.9648,
+ "step": 486
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019838234661606748,
+ "loss": 1.753,
+ "step": 487
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019837557533483846,
+ "loss": 1.7805,
+ "step": 488
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836879002739827,
+ "loss": 2.192,
+ "step": 489
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836199069471437,
+ "loss": 1.9112,
+ "step": 490
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019835517733775615,
+ "loss": 2.0119,
+ "step": 491
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.0001983483499574951,
+ "loss": 1.8932,
+ "step": 492
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019834150855490464,
+ "loss": 1.5968,
+ "step": 493
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019833465313096017,
+ "loss": 2.1493,
+ "step": 494
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019832778368663917,
+ "loss": 1.8863,
+ "step": 495
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.000198320900222921,
+ "loss": 2.2134,
+ "step": 496
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019831400274078717,
+ "loss": 2.2831,
+ "step": 497
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019830709124122112,
+ "loss": 2.0266,
+ "step": 498
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001983001657252082,
+ "loss": 2.3392,
+ "step": 499
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019829322619373588,
+ "loss": 1.8426,
+ "step": 500
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019828627264779363,
+ "loss": 2.0742,
+ "step": 501
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001982793050883728,
+ "loss": 1.9578,
+ "step": 502
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019827232351646686,
+ "loss": 2.0863,
+ "step": 503
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982653279330712,
+ "loss": 2.2881,
+ "step": 504
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019825831833918323,
+ "loss": 1.8869,
+ "step": 505
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982512947358024,
+ "loss": 1.8997,
+ "step": 506
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019824425712393012,
+ "loss": 1.8945,
+ "step": 507
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019823720550456977,
+ "loss": 1.9496,
+ "step": 508
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982301398787268,
+ "loss": 2.1066,
+ "step": 509
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019822306024740852,
+ "loss": 1.958,
+ "step": 510
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019821596661162447,
+ "loss": 2.1112,
+ "step": 511
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019820885897238596,
+ "loss": 2.1012,
+ "step": 512
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001982017373307064,
+ "loss": 2.2623,
+ "step": 513
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019819460168760117,
+ "loss": 2.5058,
+ "step": 514
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981874520440877,
+ "loss": 2.1367,
+ "step": 515
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019818028840118532,
+ "loss": 2.2743,
+ "step": 516
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019817311075991543,
+ "loss": 1.5517,
+ "step": 517
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981659191213014,
+ "loss": 1.9569,
+ "step": 518
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815871348636863,
+ "loss": 2.0566,
+ "step": 519
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815149385614444,
+ "loss": 1.8859,
+ "step": 520
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019814426023165825,
+ "loss": 2.0298,
+ "step": 521
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019813701261394136,
+ "loss": 2.0614,
+ "step": 522
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812975100402715,
+ "loss": 2.221,
+ "step": 523
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812247540295096,
+ "loss": 2.1255,
+ "step": 524
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019811518581175014,
+ "loss": 2.1885,
+ "step": 525
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.000198107882231464,
+ "loss": 2.3918,
+ "step": 526
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019810056466313392,
+ "loss": 2.2759,
+ "step": 527
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019809323310780318,
+ "loss": 1.9727,
+ "step": 528
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980858875665171,
+ "loss": 2.0417,
+ "step": 529
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019807852804032305,
+ "loss": 1.645,
+ "step": 530
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980711545302703,
+ "loss": 1.7943,
+ "step": 531
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019806376703741015,
+ "loss": 1.8844,
+ "step": 532
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019805636556279588,
+ "loss": 2.1128,
+ "step": 533
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980489501074828,
+ "loss": 2.0272,
+ "step": 534
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019804152067252816,
+ "loss": 2.0916,
+ "step": 535
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019803407725899131,
+ "loss": 1.7287,
+ "step": 536
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019802661986793342,
+ "loss": 2.0667,
+ "step": 537
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801914850041784,
+ "loss": 2.4016,
+ "step": 538
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801166315750978,
+ "loss": 1.8557,
+ "step": 539
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.0001980041638402765,
+ "loss": 1.8072,
+ "step": 540
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019799665054978722,
+ "loss": 2.2252,
+ "step": 541
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798912328711322,
+ "loss": 2.1377,
+ "step": 542
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798158205332764,
+ "loss": 2.0306,
+ "step": 543
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019797402684950576,
+ "loss": 1.7428,
+ "step": 544
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019796645767672477,
+ "loss": 2.0843,
+ "step": 545
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795887453606388,
+ "loss": 1.9175,
+ "step": 546
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795127742860423,
+ "loss": 1.6673,
+ "step": 547
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.0001979436663554291,
+ "loss": 1.5553,
+ "step": 548
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019793604131762357,
+ "loss": 1.604,
+ "step": 549
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792840231627482,
+ "loss": 2.023,
+ "step": 550
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792074935247206,
+ "loss": 1.8399,
+ "step": 551
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019791308242730638,
+ "loss": 1.8579,
+ "step": 552
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019790540154187094,
+ "loss": 2.2135,
+ "step": 553
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019789770669726087,
+ "loss": 1.7894,
+ "step": 554
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788999789457326,
+ "loss": 2.1723,
+ "step": 555
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788227513490723,
+ "loss": 2.0881,
+ "step": 556
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019787453841936393,
+ "loss": 1.7181,
+ "step": 557
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019786678774904638,
+ "loss": 1.8725,
+ "step": 558
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785902312505964,
+ "loss": 2.0544,
+ "step": 559
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785124454851084,
+ "loss": 1.7503,
+ "step": 560
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.000197843452020509,
+ "loss": 2.01,
+ "step": 561
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019783564554216518,
+ "loss": 1.748,
+ "step": 562
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978278251145924,
+ "loss": 2.0866,
+ "step": 563
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978199907389057,
+ "loss": 1.6046,
+ "step": 564
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019781214241622208,
+ "loss": 1.9222,
+ "step": 565
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019780428014766051,
+ "loss": 2.2003,
+ "step": 566
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019779640393434206,
+ "loss": 2.0534,
+ "step": 567
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001977885137773896,
+ "loss": 1.8609,
+ "step": 568
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019778060967792817,
+ "loss": 2.0666,
+ "step": 569
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019777269163708468,
+ "loss": 1.9512,
+ "step": 570
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019776475965598814,
+ "loss": 1.8349,
+ "step": 571
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977568137357694,
+ "loss": 2.0507,
+ "step": 572
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019774885387756138,
+ "loss": 1.7588,
+ "step": 573
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.000197740880082499,
+ "loss": 2.0981,
+ "step": 574
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019773289235171918,
+ "loss": 2.0953,
+ "step": 575
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019772489068636077,
+ "loss": 2.0678,
+ "step": 576
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019771687508756466,
+ "loss": 2.0136,
+ "step": 577
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977088455564736,
+ "loss": 1.9781,
+ "step": 578
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019770080209423254,
+ "loss": 2.2185,
+ "step": 579
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019769274470198827,
+ "loss": 1.8076,
+ "step": 580
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019768467338088957,
+ "loss": 1.6888,
+ "step": 581
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019767658813208726,
+ "loss": 2.1273,
+ "step": 582
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.0001976684889567341,
+ "loss": 2.3232,
+ "step": 583
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019766037585598487,
+ "loss": 2.366,
+ "step": 584
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019765224883099635,
+ "loss": 1.8939,
+ "step": 585
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019764410788292722,
+ "loss": 2.0162,
+ "step": 586
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019763595301293822,
+ "loss": 2.2752,
+ "step": 587
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976277842221921,
+ "loss": 1.9461,
+ "step": 588
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976196015118535,
+ "loss": 1.9999,
+ "step": 589
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976114048830891,
+ "loss": 2.0169,
+ "step": 590
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019760319433706757,
+ "loss": 2.1838,
+ "step": 591
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019759496987495955,
+ "loss": 2.3513,
+ "step": 592
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975867314979377,
+ "loss": 1.9915,
+ "step": 593
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975784792071766,
+ "loss": 2.1973,
+ "step": 594
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019757021300385286,
+ "loss": 2.3112,
+ "step": 595
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019756193288914507,
+ "loss": 2.0992,
+ "step": 596
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019755363886423376,
+ "loss": 2.4266,
+ "step": 597
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019754533093030148,
+ "loss": 1.7649,
+ "step": 598
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975370090885328,
+ "loss": 1.7573,
+ "step": 599
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019752867334011423,
+ "loss": 1.7949,
+ "step": 600
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975203236862342,
+ "loss": 2.0229,
+ "step": 601
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019751196012808325,
+ "loss": 2.0519,
+ "step": 602
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019750358266685383,
+ "loss": 2.0829,
+ "step": 603
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019749519130374038,
+ "loss": 2.0153,
+ "step": 604
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019748678603993933,
+ "loss": 1.8594,
+ "step": 605
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019747836687664908,
+ "loss": 2.1385,
+ "step": 606
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746993381507003,
+ "loss": 2.1317,
+ "step": 607
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746148685640451,
+ "loss": 1.1676,
+ "step": 608
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974530260018569,
+ "loss": 2.2856,
+ "step": 609
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974445512526336,
+ "loss": 2.1973,
+ "step": 610
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019743606260994278,
+ "loss": 1.6912,
+ "step": 611
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019742756007499486,
+ "loss": 1.8091,
+ "step": 612
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741904364900208,
+ "loss": 2.0108,
+ "step": 613
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741051333317867,
+ "loss": 2.1061,
+ "step": 614
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019740196912874087,
+ "loss": 1.8934,
+ "step": 615
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019739341103690693,
+ "loss": 1.8599,
+ "step": 616
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019738483905889703,
+ "loss": 2.0025,
+ "step": 617
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019737625319593335,
+ "loss": 1.8247,
+ "step": 618
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019736765344924005,
+ "loss": 2.222,
+ "step": 619
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019735903982004324,
+ "loss": 2.116,
+ "step": 620
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973504123095711,
+ "loss": 1.9183,
+ "step": 621
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973417709190536,
+ "loss": 2.1507,
+ "step": 622
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019733311564972296,
+ "loss": 1.7899,
+ "step": 623
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019732444650281315,
+ "loss": 2.1005,
+ "step": 624
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973157634795602,
+ "loss": 2.2391,
+ "step": 625
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019730706658120214,
+ "loss": 1.9466,
+ "step": 626
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.000197298355808979,
+ "loss": 1.9854,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "eval_loss": 1.9957869052886963,
+ "eval_runtime": 282.5544,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019728963116413266,
+ "loss": 2.1877,
+ "step": 628
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019728089264790712,
+ "loss": 2.2194,
+ "step": 629
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019727214026154827,
+ "loss": 1.9631,
+ "step": 630
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019726337400630405,
+ "loss": 2.3506,
+ "step": 631
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019725459388342432,
+ "loss": 2.0543,
+ "step": 632
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972457998941609,
+ "loss": 2.0402,
+ "step": 633
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019723699203976766,
+ "loss": 1.9316,
+ "step": 634
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972281703215004,
+ "loss": 2.2024,
+ "step": 635
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721933474061692,
+ "loss": 1.6776,
+ "step": 636
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721048529837694,
+ "loss": 1.9757,
+ "step": 637
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019720162199604222,
+ "loss": 1.7631,
+ "step": 638
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019719274483487648,
+ "loss": 2.34,
+ "step": 639
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.0001971838538161454,
+ "loss": 1.8469,
+ "step": 640
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019717494894111662,
+ "loss": 2.3151,
+ "step": 641
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019716603021105987,
+ "loss": 2.0661,
+ "step": 642
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019715709762724667,
+ "loss": 2.0408,
+ "step": 643
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019714815119095062,
+ "loss": 1.9848,
+ "step": 644
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019713919090344736,
+ "loss": 2.3134,
+ "step": 645
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019713021676601438,
+ "loss": 2.4947,
+ "step": 646
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001971212287799312,
+ "loss": 2.0515,
+ "step": 647
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019711222694647932,
+ "loss": 2.6216,
+ "step": 648
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019710321126694216,
+ "loss": 1.6517,
+ "step": 649
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001970941817426052,
+ "loss": 2.0408,
+ "step": 650
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019708513837475588,
+ "loss": 1.8841,
+ "step": 651
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019707608116468356,
+ "loss": 2.1966,
+ "step": 652
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019706701011367955,
+ "loss": 1.7587,
+ "step": 653
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970579252230373,
+ "loss": 2.2196,
+ "step": 654
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019704882649405198,
+ "loss": 1.8146,
+ "step": 655
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703971392802098,
+ "loss": 2.2932,
+ "step": 656
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703058752624353,
+ "loss": 1.923,
+ "step": 657
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970214472900208,
+ "loss": 2.2393,
+ "step": 658
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019701229322065605,
+ "loss": 1.7338,
+ "step": 659
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019700312531945442,
+ "loss": 1.7859,
+ "step": 660
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019699394358772306,
+ "loss": 2.2719,
+ "step": 661
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019698474802677107,
+ "loss": 1.576,
+ "step": 662
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019697553863790956,
+ "loss": 2.3333,
+ "step": 663
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019696631542245156,
+ "loss": 2.3508,
+ "step": 664
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019695707838171216,
+ "loss": 2.1876,
+ "step": 665
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019694782751700828,
+ "loss": 1.4863,
+ "step": 666
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019693856282965898,
+ "loss": 1.8948,
+ "step": 667
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019692928432098512,
+ "loss": 1.6867,
+ "step": 668
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691999199230963,
+ "loss": 1.7682,
+ "step": 669
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691068584495742,
+ "loss": 2.0914,
+ "step": 670
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019690136588025535,
+ "loss": 2.1413,
+ "step": 671
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019689203209953223,
+ "loss": 2.1275,
+ "step": 672
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.0001968826845041188,
+ "loss": 1.9556,
+ "step": 673
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019687332309534792,
+ "loss": 2.2209,
+ "step": 674
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019686394787455424,
+ "loss": 1.9853,
+ "step": 675
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019685455884307454,
+ "loss": 2.0877,
+ "step": 676
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019684515600224743,
+ "loss": 2.1607,
+ "step": 677
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019683573935341358,
+ "loss": 2.2664,
+ "step": 678
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019682630889791556,
+ "loss": 1.8527,
+ "step": 679
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.000196816864637098,
+ "loss": 1.8417,
+ "step": 680
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019680740657230738,
+ "loss": 1.9853,
+ "step": 681
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019679793470489228,
+ "loss": 1.8419,
+ "step": 682
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019678844903620317,
+ "loss": 1.9971,
+ "step": 683
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019677894956759246,
+ "loss": 1.9843,
+ "step": 684
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019676943630041462,
+ "loss": 2.376,
+ "step": 685
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675990923602598,
+ "loss": 2.1558,
+ "step": 686
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675036837578494,
+ "loss": 1.5752,
+ "step": 687
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001967408137210518,
+ "loss": 1.6704,
+ "step": 688
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019673124527318881,
+ "loss": 2.1389,
+ "step": 689
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019672166303356028,
+ "loss": 2.126,
+ "step": 690
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019671206700353237,
+ "loss": 1.9402,
+ "step": 691
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019670245718447335,
+ "loss": 1.6701,
+ "step": 692
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019669283357775328,
+ "loss": 1.8134,
+ "step": 693
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001966831961847443,
+ "loss": 2.1642,
+ "step": 694
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019667354500682054,
+ "loss": 1.8455,
+ "step": 695
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.000196663880045358,
+ "loss": 1.9646,
+ "step": 696
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966542013017347,
+ "loss": 1.9855,
+ "step": 697
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019664450877733062,
+ "loss": 1.7029,
+ "step": 698
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019663480247352773,
+ "loss": 1.9789,
+ "step": 699
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966250823917099,
+ "loss": 1.8751,
+ "step": 700
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019661534853326301,
+ "loss": 2.3644,
+ "step": 701
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019660560089957492,
+ "loss": 1.8006,
+ "step": 702
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001965958394920354,
+ "loss": 2.2799,
+ "step": 703
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019658606431203622,
+ "loss": 1.9258,
+ "step": 704
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965762753609711,
+ "loss": 1.9521,
+ "step": 705
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019656647264023575,
+ "loss": 1.9675,
+ "step": 706
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019655665615122783,
+ "loss": 2.3686,
+ "step": 707
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019654682589534693,
+ "loss": 2.1448,
+ "step": 708
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019653698187399466,
+ "loss": 2.2475,
+ "step": 709
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965271240885745,
+ "loss": 1.9417,
+ "step": 710
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965172525404921,
+ "loss": 2.154,
+ "step": 711
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019650736723115475,
+ "loss": 2.0646,
+ "step": 712
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019649746816197196,
+ "loss": 2.235,
+ "step": 713
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019648755533435518,
+ "loss": 1.7122,
+ "step": 714
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019647762874971765,
+ "loss": 2.0635,
+ "step": 715
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019646768840947474,
+ "loss": 1.8904,
+ "step": 716
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019645773431504373,
+ "loss": 1.608,
+ "step": 717
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019644776646784388,
+ "loss": 2.2307,
+ "step": 718
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.0001964377848692963,
+ "loss": 2.176,
+ "step": 719
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019642778952082426,
+ "loss": 2.1984,
+ "step": 720
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001964177804238528,
+ "loss": 2.2625,
+ "step": 721
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019640775757980903,
+ "loss": 2.3142,
+ "step": 722
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019639772099012197,
+ "loss": 2.2366,
+ "step": 723
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019638767065622266,
+ "loss": 1.7823,
+ "step": 724
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.000196377606579544,
+ "loss": 2.0677,
+ "step": 725
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019636752876152095,
+ "loss": 1.3337,
+ "step": 726
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019635743720359037,
+ "loss": 2.055,
+ "step": 727
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001963473319071911,
+ "loss": 1.9888,
+ "step": 728
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019633721287376393,
+ "loss": 1.9258,
+ "step": 729
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019632708010475165,
+ "loss": 2.3768,
+ "step": 730
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001963169336015989,
+ "loss": 1.993,
+ "step": 731
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019630677336575242,
+ "loss": 2.1989,
+ "step": 732
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001962965993986608,
+ "loss": 2.1216,
+ "step": 733
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019628641170177464,
+ "loss": 2.2217,
+ "step": 734
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019627621027654648,
+ "loss": 1.8809,
+ "step": 735
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019626599512443077,
+ "loss": 2.0864,
+ "step": 736
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019625576624688406,
+ "loss": 2.0627,
+ "step": 737
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019624552364536473,
+ "loss": 2.1347,
+ "step": 738
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019623526732133315,
+ "loss": 1.9998,
+ "step": 739
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019622499727625162,
+ "loss": 2.1998,
+ "step": 740
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019621471351158443,
+ "loss": 1.974,
+ "step": 741
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019620441602879787,
+ "loss": 1.9425,
+ "step": 742
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019619410482936008,
+ "loss": 2.6227,
+ "step": 743
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019618377991474124,
+ "loss": 2.1209,
+ "step": 744
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019617344128641345,
+ "loss": 2.0606,
+ "step": 745
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019616308894585078,
+ "loss": 2.296,
+ "step": 746
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019615272289452923,
+ "loss": 2.0415,
+ "step": 747
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961423431339268,
+ "loss": 1.9516,
+ "step": 748
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961319496655234,
+ "loss": 2.0468,
+ "step": 749
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961215424908009,
+ "loss": 1.877,
+ "step": 750
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961111216112432,
+ "loss": 1.8129,
+ "step": 751
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019610068702833596,
+ "loss": 1.9984,
+ "step": 752
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019609023874356707,
+ "loss": 1.9013,
+ "step": 753
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019607977675842615,
+ "loss": 2.0546,
+ "step": 754
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019606930107440485,
+ "loss": 2.2817,
+ "step": 755
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960588116929968,
+ "loss": 2.0578,
+ "step": 756
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019604830861569755,
+ "loss": 2.3521,
+ "step": 757
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019603779184400457,
+ "loss": 2.0392,
+ "step": 758
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960272613794174,
+ "loss": 1.9863,
+ "step": 759
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019601671722343738,
+ "loss": 2.1889,
+ "step": 760
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960061593775679,
+ "loss": 2.0908,
+ "step": 761
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001959955878433143,
+ "loss": 1.986,
+ "step": 762
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019598500262218386,
+ "loss": 2.0339,
+ "step": 763
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019597440371568574,
+ "loss": 2.0958,
+ "step": 764
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.0001959637911253312,
+ "loss": 1.9866,
+ "step": 765
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019595316485263327,
+ "loss": 2.2228,
+ "step": 766
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019594252489910706,
+ "loss": 1.915,
+ "step": 767
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019593187126626965,
+ "loss": 2.0741,
+ "step": 768
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019592120395563994,
+ "loss": 2.5346,
+ "step": 769
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019591052296873888,
+ "loss": 2.4908,
+ "step": 770
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019589982830708937,
+ "loss": 2.1042,
+ "step": 771
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019588911997221625,
+ "loss": 1.8676,
+ "step": 772
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958783979656462,
+ "loss": 1.9152,
+ "step": 773
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019586766228890806,
+ "loss": 1.7784,
+ "step": 774
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958569129435324,
+ "loss": 2.0784,
+ "step": 775
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958461499310519,
+ "loss": 1.7262,
+ "step": 776
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019583537325300118,
+ "loss": 2.4154,
+ "step": 777
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019582458291091663,
+ "loss": 2.3185,
+ "step": 778
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019581377890633684,
+ "loss": 2.0981,
+ "step": 779
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019580296124080212,
+ "loss": 1.8952,
+ "step": 780
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019579212991585493,
+ "loss": 1.7208,
+ "step": 781
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019578128493303955,
+ "loss": 2.0209,
+ "step": 782
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019577042629390217,
+ "loss": 2.1867,
+ "step": 783
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957595539999911,
+ "loss": 2.0805,
+ "step": 784
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019574866805285645,
+ "loss": 2.0451,
+ "step": 785
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019573776845405028,
+ "loss": 2.2056,
+ "step": 786
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957268552051267,
+ "loss": 2.0773,
+ "step": 787
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019571592830764165,
+ "loss": 2.2036,
+ "step": 788
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019570498776315309,
+ "loss": 1.7298,
+ "step": 789
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956940335732209,
+ "loss": 1.8931,
+ "step": 790
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956830657394069,
+ "loss": 2.1567,
+ "step": 791
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019567208426327488,
+ "loss": 1.9471,
+ "step": 792
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019566108914639054,
+ "loss": 1.8916,
+ "step": 793
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019565008039032158,
+ "loss": 2.0111,
+ "step": 794
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019563905799663752,
+ "loss": 2.1374,
+ "step": 795
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019562802196691003,
+ "loss": 2.3083,
+ "step": 796
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019561697230271254,
+ "loss": 2.0381,
+ "step": 797
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001956059090056205,
+ "loss": 2.1909,
+ "step": 798
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019559483207721133,
+ "loss": 1.9893,
+ "step": 799
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955837415190643,
+ "loss": 2.3178,
+ "step": 800
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955726373327607,
+ "loss": 2.0815,
+ "step": 801
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019556151951988376,
+ "loss": 1.6012,
+ "step": 802
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019555038808201865,
+ "loss": 1.4965,
+ "step": 803
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019553924302075242,
+ "loss": 2.3069,
+ "step": 804
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019552808433767415,
+ "loss": 2.2388,
+ "step": 805
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019551691203437482,
+ "loss": 2.5662,
+ "step": 806
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019550572611244738,
+ "loss": 1.9419,
+ "step": 807
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019549452657348663,
+ "loss": 2.3638,
+ "step": 808
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019548331341908947,
+ "loss": 2.1567,
+ "step": 809
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019547208665085457,
+ "loss": 1.9697,
+ "step": 810
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019546084627038268,
+ "loss": 1.9006,
+ "step": 811
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.0001954495922792764,
+ "loss": 2.304,
+ "step": 812
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.0001954383246791403,
+ "loss": 2.0494,
+ "step": 813
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019542704347158093,
+ "loss": 1.8562,
+ "step": 814
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019541574865820672,
+ "loss": 2.1041,
+ "step": 815
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019540444024062804,
+ "loss": 2.22,
+ "step": 816
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019539311822045727,
+ "loss": 1.9925,
+ "step": 817
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019538178259930869,
+ "loss": 2.3213,
+ "step": 818
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019537043337879845,
+ "loss": 2.0319,
+ "step": 819
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019535907056054475,
+ "loss": 1.8578,
+ "step": 820
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019534769414616764,
+ "loss": 1.4115,
+ "step": 821
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953363041372892,
+ "loss": 2.0731,
+ "step": 822
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019532490053553335,
+ "loss": 2.0605,
+ "step": 823
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019531348334252607,
+ "loss": 1.9044,
+ "step": 824
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953020525598951,
+ "loss": 1.7405,
+ "step": 825
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001952906081892703,
+ "loss": 1.898,
+ "step": 826
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019527915023228332,
+ "loss": 1.9696,
+ "step": 827
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019526767869056788,
+ "loss": 2.0469,
+ "step": 828
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019525619356575952,
+ "loss": 2.0307,
+ "step": 829
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019524469485949583,
+ "loss": 2.002,
+ "step": 830
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019523318257341622,
+ "loss": 1.9438,
+ "step": 831
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019522165670916207,
+ "loss": 1.535,
+ "step": 832
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001952101172683768,
+ "loss": 1.7505,
+ "step": 833
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019519856425270562,
+ "loss": 2.2248,
+ "step": 834
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019518699766379576,
+ "loss": 2.0669,
+ "step": 835
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019517541750329635,
+ "loss": 2.0268,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "eval_loss": 1.9969017505645752,
+ "eval_runtime": 283.3157,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019516382377285848,
+ "loss": 1.6712,
+ "step": 837
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001951522164741352,
+ "loss": 2.1558,
+ "step": 838
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019514059560878138,
+ "loss": 2.1599,
+ "step": 839
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019512896117845392,
+ "loss": 1.8762,
+ "step": 840
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019511731318481168,
+ "loss": 2.0189,
+ "step": 841
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019510565162951537,
+ "loss": 1.9364,
+ "step": 842
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019509397651422769,
+ "loss": 1.7319,
+ "step": 843
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019508228784061326,
+ "loss": 1.9424,
+ "step": 844
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001950705856103386,
+ "loss": 2.277,
+ "step": 845
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019505886982507225,
+ "loss": 1.6511,
+ "step": 846
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950471404864846,
+ "loss": 1.9056,
+ "step": 847
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019503539759624798,
+ "loss": 1.5105,
+ "step": 848
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950236411560367,
+ "loss": 1.9469,
+ "step": 849
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019501187116752693,
+ "loss": 1.5012,
+ "step": 850
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019500008763239683,
+ "loss": 1.7086,
+ "step": 851
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019498829055232647,
+ "loss": 1.5586,
+ "step": 852
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019497647992899788,
+ "loss": 1.5573,
+ "step": 853
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.000194964655764095,
+ "loss": 2.0757,
+ "step": 854
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019495281805930367,
+ "loss": 1.5478,
+ "step": 855
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019494096681631172,
+ "loss": 1.7068,
+ "step": 856
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019492910203680884,
+ "loss": 1.6759,
+ "step": 857
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001949172237224867,
+ "loss": 1.4621,
+ "step": 858
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019490533187503892,
+ "loss": 1.5359,
+ "step": 859
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.000194893426496161,
+ "loss": 1.9365,
+ "step": 860
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019488150758755035,
+ "loss": 1.7089,
+ "step": 861
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019486957515090641,
+ "loss": 1.4924,
+ "step": 862
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019485762918793046,
+ "loss": 1.387,
+ "step": 863
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.0001948456697003257,
+ "loss": 1.631,
+ "step": 864
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019483369668979732,
+ "loss": 1.7953,
+ "step": 865
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019482171015805245,
+ "loss": 1.7552,
+ "step": 866
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019480971010680002,
+ "loss": 1.8313,
+ "step": 867
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019479769653775106,
+ "loss": 1.593,
+ "step": 868
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019478566945261837,
+ "loss": 1.9506,
+ "step": 869
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019477362885311682,
+ "loss": 1.9598,
+ "step": 870
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947615747409631,
+ "loss": 1.7324,
+ "step": 871
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019474950711787585,
+ "loss": 2.1208,
+ "step": 872
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947374259855757,
+ "loss": 1.4111,
+ "step": 873
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019472533134578507,
+ "loss": 1.6696,
+ "step": 874
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019471322320022849,
+ "loss": 1.6999,
+ "step": 875
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019470110155063225,
+ "loss": 2.1287,
+ "step": 876
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019468896639872468,
+ "loss": 1.874,
+ "step": 877
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019467681774623592,
+ "loss": 1.7149,
+ "step": 878
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019466465559489816,
+ "loss": 1.9563,
+ "step": 879
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019465247994644545,
+ "loss": 1.3504,
+ "step": 880
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019464029080261378,
+ "loss": 1.6176,
+ "step": 881
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019462808816514103,
+ "loss": 1.7577,
+ "step": 882
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019461587203576706,
+ "loss": 1.8054,
+ "step": 883
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019460364241623358,
+ "loss": 2.0246,
+ "step": 884
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019459139930828428,
+ "loss": 1.7645,
+ "step": 885
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945791427136648,
+ "loss": 1.9225,
+ "step": 886
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019456687263412262,
+ "loss": 1.8967,
+ "step": 887
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945545890714072,
+ "loss": 1.5287,
+ "step": 888
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945422920272699,
+ "loss": 1.5033,
+ "step": 889
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019452998150346401,
+ "loss": 2.0148,
+ "step": 890
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945176575017448,
+ "loss": 1.3706,
+ "step": 891
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001945053200238693,
+ "loss": 1.7603,
+ "step": 892
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019449296907159667,
+ "loss": 1.9884,
+ "step": 893
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019448060464668783,
+ "loss": 1.6133,
+ "step": 894
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019446822675090565,
+ "loss": 1.7885,
+ "step": 895
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019445583538601498,
+ "loss": 1.8573,
+ "step": 896
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944434305537826,
+ "loss": 1.7241,
+ "step": 897
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944310122559771,
+ "loss": 1.8942,
+ "step": 898
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944185804943691,
+ "loss": 1.7541,
+ "step": 899
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019440613527073105,
+ "loss": 1.9608,
+ "step": 900
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019439367658683745,
+ "loss": 2.0969,
+ "step": 901
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019438120444446457,
+ "loss": 2.2589,
+ "step": 902
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943687188453907,
+ "loss": 1.7335,
+ "step": 903
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019435621979139596,
+ "loss": 1.8663,
+ "step": 904
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019434370728426252,
+ "loss": 1.5627,
+ "step": 905
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943311813257743,
+ "loss": 1.6101,
+ "step": 906
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019431864191771732,
+ "loss": 1.9661,
+ "step": 907
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943060890618794,
+ "loss": 1.6487,
+ "step": 908
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019429352276005026,
+ "loss": 2.1282,
+ "step": 909
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019428094301402162,
+ "loss": 1.6944,
+ "step": 910
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019426834982558705,
+ "loss": 1.2433,
+ "step": 911
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019425574319654213,
+ "loss": 1.5735,
+ "step": 912
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019424312312868417,
+ "loss": 1.6499,
+ "step": 913
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019423048962381265,
+ "loss": 1.8366,
+ "step": 914
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019421784268372876,
+ "loss": 1.906,
+ "step": 915
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019420518231023568,
+ "loss": 1.5976,
+ "step": 916
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941925085051385,
+ "loss": 1.6722,
+ "step": 917
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019417982127024422,
+ "loss": 1.8832,
+ "step": 918
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019416712060736183,
+ "loss": 1.8865,
+ "step": 919
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019415440651830208,
+ "loss": 1.6627,
+ "step": 920
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941416790048778,
+ "loss": 1.3598,
+ "step": 921
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019412893806890357,
+ "loss": 2.0506,
+ "step": 922
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019411618371219605,
+ "loss": 1.9794,
+ "step": 923
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941034159365737,
+ "loss": 1.7851,
+ "step": 924
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001940906347438569,
+ "loss": 1.8312,
+ "step": 925
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019407784013586804,
+ "loss": 1.5167,
+ "step": 926
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019406503211443128,
+ "loss": 1.5725,
+ "step": 927
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019405221068137277,
+ "loss": 1.8857,
+ "step": 928
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019403937583852061,
+ "loss": 1.741,
+ "step": 929
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019402652758770475,
+ "loss": 1.6748,
+ "step": 930
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019401366593075706,
+ "loss": 1.7285,
+ "step": 931
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019400079086951135,
+ "loss": 1.7545,
+ "step": 932
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019398790240580333,
+ "loss": 1.4491,
+ "step": 933
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019397500054147058,
+ "loss": 1.3359,
+ "step": 934
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019396208527835263,
+ "loss": 1.9567,
+ "step": 935
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.0001939491566182909,
+ "loss": 2.0011,
+ "step": 936
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019393621456312881,
+ "loss": 1.9076,
+ "step": 937
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019392325911471155,
+ "loss": 1.5388,
+ "step": 938
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019391029027488629,
+ "loss": 1.2337,
+ "step": 939
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019389730804550211,
+ "loss": 1.5752,
+ "step": 940
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019388431242840998,
+ "loss": 1.9131,
+ "step": 941
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019387130342546284,
+ "loss": 1.4177,
+ "step": 942
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019385828103851544,
+ "loss": 1.5865,
+ "step": 943
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001938452452694245,
+ "loss": 1.6335,
+ "step": 944
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019383219612004865,
+ "loss": 1.8599,
+ "step": 945
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019381913359224842,
+ "loss": 1.3035,
+ "step": 946
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019380605768788621,
+ "loss": 1.7586,
+ "step": 947
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001937929684088264,
+ "loss": 1.7334,
+ "step": 948
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019377986575693518,
+ "loss": 1.5749,
+ "step": 949
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019376674973408075,
+ "loss": 1.874,
+ "step": 950
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019375362034213314,
+ "loss": 2.3055,
+ "step": 951
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019374047758296433,
+ "loss": 1.5801,
+ "step": 952
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001937273214584482,
+ "loss": 1.8788,
+ "step": 953
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019371415197046052,
+ "loss": 2.431,
+ "step": 954
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019370096912087897,
+ "loss": 1.4963,
+ "step": 955
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001936877729115831,
+ "loss": 1.514,
+ "step": 956
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019367456334445446,
+ "loss": 1.6099,
+ "step": 957
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019366134042137642,
+ "loss": 1.9367,
+ "step": 958
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019364810414423427,
+ "loss": 1.7384,
+ "step": 959
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019363485451491524,
+ "loss": 1.6166,
+ "step": 960
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019362159153530844,
+ "loss": 1.955,
+ "step": 961
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019360831520730482,
+ "loss": 1.4189,
+ "step": 962
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019359502553279736,
+ "loss": 1.4506,
+ "step": 963
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019358172251368087,
+ "loss": 1.7108,
+ "step": 964
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019356840615185203,
+ "loss": 1.6641,
+ "step": 965
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019355507644920952,
+ "loss": 1.7506,
+ "step": 966
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019354173340765382,
+ "loss": 2.0598,
+ "step": 967
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935283770290874,
+ "loss": 1.3494,
+ "step": 968
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019351500731541453,
+ "loss": 1.6571,
+ "step": 969
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935016242685415,
+ "loss": 1.6403,
+ "step": 970
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019348822789037637,
+ "loss": 1.7555,
+ "step": 971
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019347481818282925,
+ "loss": 2.1451,
+ "step": 972
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.000193461395147812,
+ "loss": 1.4522,
+ "step": 973
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934479587872385,
+ "loss": 1.7147,
+ "step": 974
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934345091030245,
+ "loss": 1.3909,
+ "step": 975
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019342104609708756,
+ "loss": 1.8104,
+ "step": 976
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019340756977134728,
+ "loss": 1.5221,
+ "step": 977
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.000193394080127725,
+ "loss": 1.9447,
+ "step": 978
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.0001933805771681442,
+ "loss": 1.5742,
+ "step": 979
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019336706089452996,
+ "loss": 1.5312,
+ "step": 980
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019335353130880948,
+ "loss": 1.4304,
+ "step": 981
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019333998841291177,
+ "loss": 1.8379,
+ "step": 982
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019332643220876773,
+ "loss": 1.877,
+ "step": 983
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001933128626983102,
+ "loss": 1.9627,
+ "step": 984
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932992798834739,
+ "loss": 1.7857,
+ "step": 985
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019328568376619543,
+ "loss": 1.3189,
+ "step": 986
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019327207434841333,
+ "loss": 1.9588,
+ "step": 987
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019325845163206795,
+ "loss": 1.3132,
+ "step": 988
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019324481561910163,
+ "loss": 1.6304,
+ "step": 989
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932311663114586,
+ "loss": 1.8322,
+ "step": 990
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019321750371108486,
+ "loss": 1.4192,
+ "step": 991
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001932038278199285,
+ "loss": 1.3915,
+ "step": 992
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019319013863993933,
+ "loss": 1.8433,
+ "step": 993
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931764361730692,
+ "loss": 2.1459,
+ "step": 994
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931627204212717,
+ "loss": 1.9799,
+ "step": 995
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019314899138650243,
+ "loss": 1.855,
+ "step": 996
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019313524907071887,
+ "loss": 1.4763,
+ "step": 997
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019312149347588037,
+ "loss": 2.0128,
+ "step": 998
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019310772460394814,
+ "loss": 1.6964,
+ "step": 999
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001930939424568854,
+ "loss": 1.5864,
+ "step": 1000
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019308014703665712,
+ "loss": 1.8437,
+ "step": 1001
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019306633834523024,
+ "loss": 2.1677,
+ "step": 1002
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019305251638457356,
+ "loss": 1.8872,
+ "step": 1003
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930386811566578,
+ "loss": 1.7312,
+ "step": 1004
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930248326634556,
+ "loss": 1.6772,
+ "step": 1005
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019301097090694143,
+ "loss": 1.9666,
+ "step": 1006
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019299709588909165,
+ "loss": 1.8946,
+ "step": 1007
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019298320761188453,
+ "loss": 2.1784,
+ "step": 1008
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.0001929693060773003,
+ "loss": 2.0249,
+ "step": 1009
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019295539128732093,
+ "loss": 1.717,
+ "step": 1010
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019294146324393046,
+ "loss": 1.8671,
+ "step": 1011
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019292752194911464,
+ "loss": 1.8388,
+ "step": 1012
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019291356740486123,
+ "loss": 1.9111,
+ "step": 1013
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019289959961315986,
+ "loss": 1.5287,
+ "step": 1014
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.000192885618576002,
+ "loss": 1.5669,
+ "step": 1015
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019287162429538105,
+ "loss": 1.9095,
+ "step": 1016
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019285761677329232,
+ "loss": 1.9133,
+ "step": 1017
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019284359601173294,
+ "loss": 2.1099,
+ "step": 1018
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.000192829562012702,
+ "loss": 1.6303,
+ "step": 1019
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019281551477820036,
+ "loss": 1.5907,
+ "step": 1020
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019280145431023097,
+ "loss": 1.4897,
+ "step": 1021
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019278738061079845,
+ "loss": 1.7414,
+ "step": 1022
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019277329368190942,
+ "loss": 1.816,
+ "step": 1023
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019275919352557241,
+ "loss": 1.5033,
+ "step": 1024
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019274508014379777,
+ "loss": 1.7923,
+ "step": 1025
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019273095353859775,
+ "loss": 1.3094,
+ "step": 1026
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019271681371198652,
+ "loss": 1.7689,
+ "step": 1027
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.0001927026606659801,
+ "loss": 1.8019,
+ "step": 1028
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019268849440259639,
+ "loss": 1.8818,
+ "step": 1029
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019267431492385521,
+ "loss": 1.7442,
+ "step": 1030
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019266012223177824,
+ "loss": 2.045,
+ "step": 1031
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019264591632838903,
+ "loss": 1.7842,
+ "step": 1032
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019263169721571308,
+ "loss": 1.5289,
+ "step": 1033
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019261746489577765,
+ "loss": 1.6013,
+ "step": 1034
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019260321937061202,
+ "loss": 1.7912,
+ "step": 1035
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925889606422473,
+ "loss": 1.7573,
+ "step": 1036
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925746887127164,
+ "loss": 1.7368,
+ "step": 1037
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019256040358405424,
+ "loss": 1.7497,
+ "step": 1038
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019254610525829758,
+ "loss": 2.0042,
+ "step": 1039
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019253179373748504,
+ "loss": 2.0732,
+ "step": 1040
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019251746902365708,
+ "loss": 1.8878,
+ "step": 1041
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019250313111885618,
+ "loss": 1.9404,
+ "step": 1042
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019248878002512654,
+ "loss": 1.5535,
+ "step": 1043
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019247441574451432,
+ "loss": 1.9344,
+ "step": 1044
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001924600382790676,
+ "loss": 1.9696,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "eval_loss": 2.064669609069824,
+ "eval_runtime": 283.003,
+ "eval_samples_per_second": 0.728,
+ "eval_steps_per_second": 0.728,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019244564763083624,
+ "loss": 1.4577,
+ "step": 1046
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019243124380187204,
+ "loss": 2.1324,
+ "step": 1047
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019241682679422873,
+ "loss": 1.4713,
+ "step": 1048
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019240239660996177,
+ "loss": 1.7455,
+ "step": 1049
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001923879532511287,
+ "loss": 1.5372,
+ "step": 1050
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019237349671978872,
+ "loss": 2.0984,
+ "step": 1051
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923590270180031,
+ "loss": 1.5023,
+ "step": 1052
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923445441478348,
+ "loss": 2.0826,
+ "step": 1053
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019233004811134886,
+ "loss": 1.7448,
+ "step": 1054
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019231553891061208,
+ "loss": 2.0249,
+ "step": 1055
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019230101654769312,
+ "loss": 1.6144,
+ "step": 1056
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001922864810246626,
+ "loss": 1.9193,
+ "step": 1057
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019227193234359292,
+ "loss": 2.0057,
+ "step": 1058
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019225737050655842,
+ "loss": 1.9493,
+ "step": 1059
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019224279551563532,
+ "loss": 1.9545,
+ "step": 1060
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001922282073729017,
+ "loss": 1.8983,
+ "step": 1061
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019221360608043746,
+ "loss": 1.9414,
+ "step": 1062
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019219899164032447,
+ "loss": 1.8471,
+ "step": 1063
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921843640546464,
+ "loss": 1.7568,
+ "step": 1064
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019216972332548887,
+ "loss": 2.0737,
+ "step": 1065
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921550694549393,
+ "loss": 1.6109,
+ "step": 1066
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.000192140402445087,
+ "loss": 1.6684,
+ "step": 1067
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001921257222980232,
+ "loss": 1.5101,
+ "step": 1068
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019211102901584094,
+ "loss": 1.5262,
+ "step": 1069
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920963226006352,
+ "loss": 1.9757,
+ "step": 1070
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019208160305450272,
+ "loss": 2.038,
+ "step": 1071
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019206687037954224,
+ "loss": 1.4755,
+ "step": 1072
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019205212457785434,
+ "loss": 1.7406,
+ "step": 1073
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019203736565154137,
+ "loss": 1.9564,
+ "step": 1074
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920225936027077,
+ "loss": 1.823,
+ "step": 1075
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001920078084334595,
+ "loss": 1.8275,
+ "step": 1076
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919930101459048,
+ "loss": 1.7106,
+ "step": 1077
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019197819874215347,
+ "loss": 1.5958,
+ "step": 1078
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019196337422431735,
+ "loss": 2.1478,
+ "step": 1079
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919485365945101,
+ "loss": 1.7238,
+ "step": 1080
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019193368585484718,
+ "loss": 2.0758,
+ "step": 1081
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.000191918822007446,
+ "loss": 1.8403,
+ "step": 1082
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019190394505442585,
+ "loss": 1.8286,
+ "step": 1083
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019188905499790789,
+ "loss": 1.6992,
+ "step": 1084
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019187415184001503,
+ "loss": 1.8512,
+ "step": 1085
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918592355828722,
+ "loss": 1.8236,
+ "step": 1086
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918443062286061,
+ "loss": 1.6173,
+ "step": 1087
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019182936377934535,
+ "loss": 1.8593,
+ "step": 1088
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918144082372204,
+ "loss": 1.8184,
+ "step": 1089
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019179943960436358,
+ "loss": 1.9655,
+ "step": 1090
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019178445788290915,
+ "loss": 1.5858,
+ "step": 1091
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019176946307499312,
+ "loss": 1.8359,
+ "step": 1092
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917544551827534,
+ "loss": 1.4354,
+ "step": 1093
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019173943420832984,
+ "loss": 1.4312,
+ "step": 1094
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917244001538641,
+ "loss": 2.0024,
+ "step": 1095
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019170935302149965,
+ "loss": 1.5994,
+ "step": 1096
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019169429281338195,
+ "loss": 2.05,
+ "step": 1097
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019167921953165825,
+ "loss": 1.8746,
+ "step": 1098
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019166413317847763,
+ "loss": 2.0071,
+ "step": 1099
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019164903375599112,
+ "loss": 2.0331,
+ "step": 1100
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019163392126635154,
+ "loss": 1.3587,
+ "step": 1101
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019161879571171362,
+ "loss": 1.6144,
+ "step": 1102
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019160365709423388,
+ "loss": 1.4845,
+ "step": 1103
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019158850541607083,
+ "loss": 1.4511,
+ "step": 1104
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019157334067938474,
+ "loss": 1.8015,
+ "step": 1105
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019155816288633776,
+ "loss": 1.5029,
+ "step": 1106
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019154297203909394,
+ "loss": 1.7102,
+ "step": 1107
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019152776813981912,
+ "loss": 1.6661,
+ "step": 1108
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001915125511906811,
+ "loss": 1.5872,
+ "step": 1109
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019149732119384943,
+ "loss": 1.7868,
+ "step": 1110
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914820781514956,
+ "loss": 1.6365,
+ "step": 1111
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914668220657929,
+ "loss": 2.3434,
+ "step": 1112
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914515529389166,
+ "loss": 1.6458,
+ "step": 1113
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914362707730437,
+ "loss": 1.7061,
+ "step": 1114
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019142097557035308,
+ "loss": 1.8606,
+ "step": 1115
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019140566733302552,
+ "loss": 1.9415,
+ "step": 1116
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019139034606324362,
+ "loss": 1.7411,
+ "step": 1117
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019137501176319193,
+ "loss": 1.9404,
+ "step": 1118
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913596644350567,
+ "loss": 1.802,
+ "step": 1119
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019134430408102615,
+ "loss": 1.2244,
+ "step": 1120
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019132893070329036,
+ "loss": 1.902,
+ "step": 1121
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913135443040412,
+ "loss": 1.4578,
+ "step": 1122
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019129814488547247,
+ "loss": 1.6816,
+ "step": 1123
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001912827324497798,
+ "loss": 1.7293,
+ "step": 1124
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019126730699916061,
+ "loss": 1.6344,
+ "step": 1125
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.0001912518685358143,
+ "loss": 1.6819,
+ "step": 1126
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019123641706194199,
+ "loss": 1.6761,
+ "step": 1127
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019122095257974677,
+ "loss": 1.9222,
+ "step": 1128
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019120547509143354,
+ "loss": 1.6117,
+ "step": 1129
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019118998459920902,
+ "loss": 1.688,
+ "step": 1130
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019117448110528184,
+ "loss": 1.8383,
+ "step": 1131
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019115896461186245,
+ "loss": 1.5225,
+ "step": 1132
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019114343512116318,
+ "loss": 2.0376,
+ "step": 1133
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019112789263539813,
+ "loss": 1.5632,
+ "step": 1134
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019111233715678343,
+ "loss": 1.7049,
+ "step": 1135
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910967686875369,
+ "loss": 1.4992,
+ "step": 1136
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019108118722987826,
+ "loss": 1.7949,
+ "step": 1137
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019106559278602903,
+ "loss": 1.4688,
+ "step": 1138
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019104998535821274,
+ "loss": 1.4031,
+ "step": 1139
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910343649486546,
+ "loss": 2.1757,
+ "step": 1140
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019101873155958179,
+ "loss": 1.622,
+ "step": 1141
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019100308519322322,
+ "loss": 1.9441,
+ "step": 1142
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.0001909874258518098,
+ "loss": 1.8065,
+ "step": 1143
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019097175353757417,
+ "loss": 1.8348,
+ "step": 1144
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019095606825275083,
+ "loss": 2.0519,
+ "step": 1145
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019094036999957624,
+ "loss": 1.9172,
+ "step": 1146
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019092465878028854,
+ "loss": 1.9961,
+ "step": 1147
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019090893459712787,
+ "loss": 2.1239,
+ "step": 1148
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019089319745233611,
+ "loss": 1.3481,
+ "step": 1149
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019087744734815708,
+ "loss": 1.5035,
+ "step": 1150
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019086168428683638,
+ "loss": 1.818,
+ "step": 1151
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019084590827062145,
+ "loss": 2.0481,
+ "step": 1152
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019083011930176165,
+ "loss": 1.4444,
+ "step": 1153
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019081431738250814,
+ "loss": 1.6059,
+ "step": 1154
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907985025151139,
+ "loss": 2.0284,
+ "step": 1155
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907826747018338,
+ "loss": 1.8603,
+ "step": 1156
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019076683394492455,
+ "loss": 1.7189,
+ "step": 1157
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019075098024664468,
+ "loss": 1.7497,
+ "step": 1158
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019073511360925458,
+ "loss": 1.7489,
+ "step": 1159
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001907192340350165,
+ "loss": 1.6059,
+ "step": 1160
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019070334152619453,
+ "loss": 1.4407,
+ "step": 1161
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019068743608505455,
+ "loss": 1.7025,
+ "step": 1162
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019067151771386438,
+ "loss": 1.7921,
+ "step": 1163
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906555864148936,
+ "loss": 1.6147,
+ "step": 1164
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906396421904137,
+ "loss": 1.6192,
+ "step": 1165
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019062368504269795,
+ "loss": 1.4341,
+ "step": 1166
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019060771497402147,
+ "loss": 1.3054,
+ "step": 1167
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001905917319866613,
+ "loss": 2.041,
+ "step": 1168
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019057573608289623,
+ "loss": 2.004,
+ "step": 1169
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019055972726500695,
+ "loss": 1.4002,
+ "step": 1170
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019054370553527595,
+ "loss": 1.5554,
+ "step": 1171
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019052767089598754,
+ "loss": 1.9783,
+ "step": 1172
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.000190511623349428,
+ "loss": 1.7443,
+ "step": 1173
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019049556289788528,
+ "loss": 1.6089,
+ "step": 1174
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001904794895436493,
+ "loss": 1.8784,
+ "step": 1175
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904634032890117,
+ "loss": 2.0985,
+ "step": 1176
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904473041362661,
+ "loss": 1.811,
+ "step": 1177
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019043119208770793,
+ "loss": 1.407,
+ "step": 1178
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904150671456343,
+ "loss": 1.7269,
+ "step": 1179
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019039892931234435,
+ "loss": 1.8374,
+ "step": 1180
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019038277859013896,
+ "loss": 1.583,
+ "step": 1181
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019036661498132086,
+ "loss": 1.6407,
+ "step": 1182
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019035043848819464,
+ "loss": 2.0828,
+ "step": 1183
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019033424911306672,
+ "loss": 1.7067,
+ "step": 1184
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019031804685824534,
+ "loss": 1.55,
+ "step": 1185
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001903018317260406,
+ "loss": 1.7573,
+ "step": 1186
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019028560371876446,
+ "loss": 1.5666,
+ "step": 1187
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001902693628387306,
+ "loss": 1.5192,
+ "step": 1188
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019025310908825466,
+ "loss": 2.0093,
+ "step": 1189
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019023684246965406,
+ "loss": 1.8414,
+ "step": 1190
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019022056298524808,
+ "loss": 1.3696,
+ "step": 1191
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019020427063735782,
+ "loss": 1.6336,
+ "step": 1192
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019018796542830617,
+ "loss": 1.8528,
+ "step": 1193
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019017164736041795,
+ "loss": 2.0523,
+ "step": 1194
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019015531643601973,
+ "loss": 1.7526,
+ "step": 1195
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019013897265743998,
+ "loss": 1.8391,
+ "step": 1196
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019012261602700892,
+ "loss": 1.4257,
+ "step": 1197
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019010624654705867,
+ "loss": 2.0911,
+ "step": 1198
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.0001900898642199232,
+ "loss": 1.7578,
+ "step": 1199
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019007346904793818,
+ "loss": 1.9003,
+ "step": 1200
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900570610334413,
+ "loss": 1.3918,
+ "step": 1201
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900406401787719,
+ "loss": 2.0365,
+ "step": 1202
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019002420648627131,
+ "loss": 1.5184,
+ "step": 1203
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019000775995828254,
+ "loss": 1.6412,
+ "step": 1204
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018999130059715058,
+ "loss": 1.5031,
+ "step": 1205
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018997482840522217,
+ "loss": 1.4421,
+ "step": 1206
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018995834338484584,
+ "loss": 1.9431,
+ "step": 1207
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.000189941845538372,
+ "loss": 1.8141,
+ "step": 1208
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.0001899253348681529,
+ "loss": 1.7289,
+ "step": 1209
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018990881137654258,
+ "loss": 1.7217,
+ "step": 1210
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.000189892275065897,
+ "loss": 2.3727,
+ "step": 1211
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018987572593857381,
+ "loss": 1.4833,
+ "step": 1212
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018985916399693256,
+ "loss": 2.13,
+ "step": 1213
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018984258924333464,
+ "loss": 1.875,
+ "step": 1214
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018982600168014323,
+ "loss": 1.783,
+ "step": 1215
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018980940130972337,
+ "loss": 1.6815,
+ "step": 1216
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897927881344419,
+ "loss": 2.049,
+ "step": 1217
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018977616215666752,
+ "loss": 1.918,
+ "step": 1218
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897595233787707,
+ "loss": 1.5824,
+ "step": 1219
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018974287180312377,
+ "loss": 1.7473,
+ "step": 1220
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018972620743210093,
+ "loss": 1.6915,
+ "step": 1221
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897095302680781,
+ "loss": 1.7633,
+ "step": 1222
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018969284031343308,
+ "loss": 1.6921,
+ "step": 1223
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018967613757054554,
+ "loss": 1.5433,
+ "step": 1224
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018965942204179686,
+ "loss": 1.9389,
+ "step": 1225
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018964269372957038,
+ "loss": 1.5625,
+ "step": 1226
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018962595263625115,
+ "loss": 1.4835,
+ "step": 1227
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018960919876422611,
+ "loss": 1.8479,
+ "step": 1228
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018959243211588397,
+ "loss": 1.7861,
+ "step": 1229
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018957565269361531,
+ "loss": 1.867,
+ "step": 1230
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018955886049981245,
+ "loss": 1.9383,
+ "step": 1231
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.0001895420555368697,
+ "loss": 1.755,
+ "step": 1232
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.000189525237807183,
+ "loss": 1.5166,
+ "step": 1233
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018950840731315024,
+ "loss": 1.8629,
+ "step": 1234
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.000189491564057171,
+ "loss": 1.6845,
+ "step": 1235
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018947470804164685,
+ "loss": 1.4748,
+ "step": 1236
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018945783926898105,
+ "loss": 1.8907,
+ "step": 1237
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018944095774157873,
+ "loss": 1.5758,
+ "step": 1238
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018942406346184683,
+ "loss": 1.6367,
+ "step": 1239
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018940715643219407,
+ "loss": 1.7285,
+ "step": 1240
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018939023665503108,
+ "loss": 1.5714,
+ "step": 1241
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001893733041327702,
+ "loss": 1.9308,
+ "step": 1242
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018935635886782568,
+ "loss": 1.9153,
+ "step": 1243
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018933940086261351,
+ "loss": 1.8009,
+ "step": 1244
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018932243011955154,
+ "loss": 1.7392,
+ "step": 1245
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018930544664105944,
+ "loss": 1.821,
+ "step": 1246
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001892884504295587,
+ "loss": 1.475,
+ "step": 1247
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018927144148747255,
+ "loss": 1.8937,
+ "step": 1248
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018925441981722618,
+ "loss": 1.6958,
+ "step": 1249
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018923738542124644,
+ "loss": 1.6836,
+ "step": 1250
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018922033830196208,
+ "loss": 2.0213,
+ "step": 1251
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018920327846180365,
+ "loss": 1.9572,
+ "step": 1252
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018918620590320352,
+ "loss": 1.9449,
+ "step": 1253
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018916912062859583,
+ "loss": 1.7297,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "eval_loss": 2.0551259517669678,
+ "eval_runtime": 283.8338,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018915202264041664,
+ "loss": 1.8158,
+ "step": 1255
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891349119411037,
+ "loss": 1.921,
+ "step": 1256
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018911778853309658,
+ "loss": 1.5726,
+ "step": 1257
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891006524188368,
+ "loss": 1.6641,
+ "step": 1258
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018908350360076752,
+ "loss": 1.5841,
+ "step": 1259
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018906634208133385,
+ "loss": 1.8567,
+ "step": 1260
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018904916786298257,
+ "loss": 1.5584,
+ "step": 1261
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018903198094816242,
+ "loss": 1.6615,
+ "step": 1262
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018901478133932385,
+ "loss": 1.7477,
+ "step": 1263
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018899756903891914,
+ "loss": 1.3796,
+ "step": 1264
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018898034404940238,
+ "loss": 1.7991,
+ "step": 1265
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018896310637322953,
+ "loss": 1.4944,
+ "step": 1266
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018894585601285827,
+ "loss": 1.5719,
+ "step": 1267
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018892859297074812,
+ "loss": 1.5495,
+ "step": 1268
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018891131724936043,
+ "loss": 1.7611,
+ "step": 1269
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018889402885115833,
+ "loss": 1.5991,
+ "step": 1270
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018887672777860676,
+ "loss": 1.8849,
+ "step": 1271
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888594140341725,
+ "loss": 1.6136,
+ "step": 1272
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888420876203241,
+ "loss": 1.8288,
+ "step": 1273
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888247485395319,
+ "loss": 1.6625,
+ "step": 1274
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018880739679426816,
+ "loss": 1.49,
+ "step": 1275
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018879003238700675,
+ "loss": 1.874,
+ "step": 1276
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018877265532022352,
+ "loss": 1.751,
+ "step": 1277
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018875526559639604,
+ "loss": 1.9882,
+ "step": 1278
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018873786321800374,
+ "loss": 1.5214,
+ "step": 1279
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001887204481875278,
+ "loss": 1.741,
+ "step": 1280
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018870302050745118,
+ "loss": 1.7798,
+ "step": 1281
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018868558018025878,
+ "loss": 1.9258,
+ "step": 1282
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001886681272084371,
+ "loss": 1.9096,
+ "step": 1283
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018865066159447466,
+ "loss": 1.6729,
+ "step": 1284
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018863318334086157,
+ "loss": 1.6239,
+ "step": 1285
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018861569245008994,
+ "loss": 1.9857,
+ "step": 1286
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018859818892465354,
+ "loss": 1.9905,
+ "step": 1287
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188580672767048,
+ "loss": 2.0073,
+ "step": 1288
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018856314397977075,
+ "loss": 1.7109,
+ "step": 1289
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188545602565321,
+ "loss": 1.3727,
+ "step": 1290
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018852804852619975,
+ "loss": 1.7045,
+ "step": 1291
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018851048186490992,
+ "loss": 1.9042,
+ "step": 1292
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018849290258395602,
+ "loss": 1.7174,
+ "step": 1293
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018847531068584452,
+ "loss": 1.6502,
+ "step": 1294
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018845770617308366,
+ "loss": 1.8582,
+ "step": 1295
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001884400890481834,
+ "loss": 1.4846,
+ "step": 1296
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018842245931365562,
+ "loss": 1.5428,
+ "step": 1297
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018840481697201392,
+ "loss": 1.7266,
+ "step": 1298
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001883871620257737,
+ "loss": 1.9324,
+ "step": 1299
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018836949447745215,
+ "loss": 1.577,
+ "step": 1300
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001883518143295683,
+ "loss": 1.6388,
+ "step": 1301
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018833412158464298,
+ "loss": 1.9201,
+ "step": 1302
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018831641624519877,
+ "loss": 1.6478,
+ "step": 1303
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018829869831376005,
+ "loss": 1.6826,
+ "step": 1304
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018828096779285303,
+ "loss": 1.8513,
+ "step": 1305
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018826322468500566,
+ "loss": 1.571,
+ "step": 1306
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018824546899274777,
+ "loss": 1.1602,
+ "step": 1307
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001882277007186109,
+ "loss": 1.9998,
+ "step": 1308
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001882099198651284,
+ "loss": 1.7034,
+ "step": 1309
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001881921264348355,
+ "loss": 1.4031,
+ "step": 1310
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018817432043026911,
+ "loss": 1.8413,
+ "step": 1311
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018815650185396797,
+ "loss": 1.6606,
+ "step": 1312
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018813867070847264,
+ "loss": 1.5792,
+ "step": 1313
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018812082699632546,
+ "loss": 1.4525,
+ "step": 1314
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018810297072007054,
+ "loss": 1.4906,
+ "step": 1315
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018808510188225377,
+ "loss": 1.6284,
+ "step": 1316
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880672204854229,
+ "loss": 1.7281,
+ "step": 1317
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880493265321274,
+ "loss": 1.5345,
+ "step": 1318
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018803142002491856,
+ "loss": 2.0933,
+ "step": 1319
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018801350096634946,
+ "loss": 1.9372,
+ "step": 1320
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.000187995569358975,
+ "loss": 1.7151,
+ "step": 1321
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018797762520535177,
+ "loss": 1.4823,
+ "step": 1322
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001879596685080383,
+ "loss": 2.0495,
+ "step": 1323
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018794169926959474,
+ "loss": 2.2966,
+ "step": 1324
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018792371749258314,
+ "loss": 1.7868,
+ "step": 1325
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018790572317956735,
+ "loss": 1.9403,
+ "step": 1326
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018788771633311292,
+ "loss": 1.6687,
+ "step": 1327
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018786969695578723,
+ "loss": 1.8422,
+ "step": 1328
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018785166505015948,
+ "loss": 1.5916,
+ "step": 1329
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018783362061880062,
+ "loss": 1.9119,
+ "step": 1330
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018781556366428336,
+ "loss": 1.4903,
+ "step": 1331
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018779749418918227,
+ "loss": 1.9497,
+ "step": 1332
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018777941219607364,
+ "loss": 1.9462,
+ "step": 1333
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018776131768753556,
+ "loss": 2.0474,
+ "step": 1334
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018774321066614795,
+ "loss": 1.4474,
+ "step": 1335
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018772509113449245,
+ "loss": 1.8315,
+ "step": 1336
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018770695909515247,
+ "loss": 1.7684,
+ "step": 1337
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018768881455071332,
+ "loss": 1.2675,
+ "step": 1338
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.000187670657503762,
+ "loss": 1.8226,
+ "step": 1339
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018765248795688726,
+ "loss": 2.2112,
+ "step": 1340
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.0001876343059126797,
+ "loss": 1.3627,
+ "step": 1341
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018761611137373173,
+ "loss": 2.1488,
+ "step": 1342
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018759790434263744,
+ "loss": 1.9842,
+ "step": 1343
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018757968482199276,
+ "loss": 1.9775,
+ "step": 1344
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018756145281439545,
+ "loss": 1.6835,
+ "step": 1345
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875432083224449,
+ "loss": 1.5272,
+ "step": 1346
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875249513487425,
+ "loss": 1.7539,
+ "step": 1347
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018750668189589117,
+ "loss": 1.874,
+ "step": 1348
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018748839996649583,
+ "loss": 1.5858,
+ "step": 1349
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018747010556316305,
+ "loss": 1.9298,
+ "step": 1350
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001874517986885012,
+ "loss": 1.5079,
+ "step": 1351
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018743347934512046,
+ "loss": 1.884,
+ "step": 1352
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018741514753563277,
+ "loss": 1.7978,
+ "step": 1353
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873968032626518,
+ "loss": 1.7735,
+ "step": 1354
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018737844652879312,
+ "loss": 1.7227,
+ "step": 1355
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018736007733667393,
+ "loss": 1.8458,
+ "step": 1356
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018734169568891334,
+ "loss": 1.3268,
+ "step": 1357
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873233015881321,
+ "loss": 1.3782,
+ "step": 1358
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018730489503695287,
+ "loss": 1.9614,
+ "step": 1359
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018728647603800003,
+ "loss": 1.7755,
+ "step": 1360
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018726804459389963,
+ "loss": 1.7961,
+ "step": 1361
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018724960070727972,
+ "loss": 1.7158,
+ "step": 1362
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872311443807699,
+ "loss": 1.6303,
+ "step": 1363
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872126756170017,
+ "loss": 1.8734,
+ "step": 1364
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018719419441860834,
+ "loss": 1.5143,
+ "step": 1365
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001871757007882248,
+ "loss": 1.498,
+ "step": 1366
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001871571947284879,
+ "loss": 1.0886,
+ "step": 1367
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018713867624203621,
+ "loss": 1.6633,
+ "step": 1368
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018712014533151008,
+ "loss": 1.8895,
+ "step": 1369
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018710160199955156,
+ "loss": 1.4178,
+ "step": 1370
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018708304624880456,
+ "loss": 1.6814,
+ "step": 1371
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001870644780819147,
+ "loss": 1.8671,
+ "step": 1372
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018704589750152944,
+ "loss": 1.4786,
+ "step": 1373
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018702730451029796,
+ "loss": 1.8622,
+ "step": 1374
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018700869911087115,
+ "loss": 1.8891,
+ "step": 1375
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869900813059018,
+ "loss": 2.0493,
+ "step": 1376
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018697145109804436,
+ "loss": 1.7238,
+ "step": 1377
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018695280848995513,
+ "loss": 1.7826,
+ "step": 1378
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869341534842921,
+ "loss": 1.8557,
+ "step": 1379
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869154860837151,
+ "loss": 1.7492,
+ "step": 1380
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868968062908857,
+ "loss": 1.7441,
+ "step": 1381
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868781141084672,
+ "loss": 1.8322,
+ "step": 1382
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868594095391247,
+ "loss": 1.8177,
+ "step": 1383
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018684069258552508,
+ "loss": 2.0001,
+ "step": 1384
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018682196325033696,
+ "loss": 1.5046,
+ "step": 1385
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018680322153623075,
+ "loss": 1.6789,
+ "step": 1386
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867844674458786,
+ "loss": 1.6951,
+ "step": 1387
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018676570098195443,
+ "loss": 2.0334,
+ "step": 1388
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018674692214713388,
+ "loss": 1.7833,
+ "step": 1389
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867281309440945,
+ "loss": 1.82,
+ "step": 1390
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018670932737551547,
+ "loss": 1.8155,
+ "step": 1391
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018669051144407775,
+ "loss": 1.7912,
+ "step": 1392
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018667168315246406,
+ "loss": 1.5816,
+ "step": 1393
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018665284250335895,
+ "loss": 1.7521,
+ "step": 1394
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018663398949944865,
+ "loss": 1.4287,
+ "step": 1395
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018661512414342127,
+ "loss": 1.6026,
+ "step": 1396
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018659624643796647,
+ "loss": 1.6953,
+ "step": 1397
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018657735638577587,
+ "loss": 1.8515,
+ "step": 1398
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018655845398954276,
+ "loss": 2.0384,
+ "step": 1399
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018653953925196225,
+ "loss": 1.5458,
+ "step": 1400
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018652061217573114,
+ "loss": 1.7166,
+ "step": 1401
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.000186501672763548,
+ "loss": 1.5653,
+ "step": 1402
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018648272101811318,
+ "loss": 2.0928,
+ "step": 1403
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018646375694212884,
+ "loss": 1.605,
+ "step": 1404
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018644478053829878,
+ "loss": 1.4734,
+ "step": 1405
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018642579180932865,
+ "loss": 2.0578,
+ "step": 1406
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018640679075792582,
+ "loss": 1.9823,
+ "step": 1407
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018638777738679943,
+ "loss": 2.0551,
+ "step": 1408
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018636875169866036,
+ "loss": 1.6315,
+ "step": 1409
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863497136962213,
+ "loss": 1.8965,
+ "step": 1410
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863306633821966,
+ "loss": 1.3584,
+ "step": 1411
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018631160075930245,
+ "loss": 1.9673,
+ "step": 1412
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018629252583025676,
+ "loss": 1.5277,
+ "step": 1413
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001862734385977792,
+ "loss": 1.6788,
+ "step": 1414
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018625433906459116,
+ "loss": 1.432,
+ "step": 1415
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018623522723341588,
+ "loss": 1.8102,
+ "step": 1416
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018621610310697823,
+ "loss": 1.6713,
+ "step": 1417
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018619696668800492,
+ "loss": 1.6989,
+ "step": 1418
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001861778179792244,
+ "loss": 1.7645,
+ "step": 1419
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018615865698336684,
+ "loss": 1.594,
+ "step": 1420
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018613948370316415,
+ "loss": 1.8751,
+ "step": 1421
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018612029814135014,
+ "loss": 1.64,
+ "step": 1422
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018610110030066007,
+ "loss": 1.5066,
+ "step": 1423
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001860818901838313,
+ "loss": 1.9817,
+ "step": 1424
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018606266779360266,
+ "loss": 2.056,
+ "step": 1425
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860434331327149,
+ "loss": 1.6997,
+ "step": 1426
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018602418620391044,
+ "loss": 1.5573,
+ "step": 1427
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860049270099335,
+ "loss": 1.8427,
+ "step": 1428
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018598565555353,
+ "loss": 2.012,
+ "step": 1429
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018596637183744763,
+ "loss": 1.7976,
+ "step": 1430
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018594707586443585,
+ "loss": 1.4,
+ "step": 1431
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001859277676372458,
+ "loss": 1.8717,
+ "step": 1432
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018590844715863045,
+ "loss": 1.4311,
+ "step": 1433
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018588911443134448,
+ "loss": 1.5903,
+ "step": 1434
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018586976945814425,
+ "loss": 2.0898,
+ "step": 1435
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018585041224178803,
+ "loss": 1.5302,
+ "step": 1436
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018583104278503568,
+ "loss": 1.9582,
+ "step": 1437
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018581166109064886,
+ "loss": 1.5264,
+ "step": 1438
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018579226716139096,
+ "loss": 1.6551,
+ "step": 1439
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018577286100002723,
+ "loss": 1.7774,
+ "step": 1440
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018575344260932444,
+ "loss": 1.8316,
+ "step": 1441
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001857340119920513,
+ "loss": 1.3916,
+ "step": 1442
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018571456915097818,
+ "loss": 1.6728,
+ "step": 1443
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856951140888772,
+ "loss": 1.7247,
+ "step": 1444
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018567564680852224,
+ "loss": 1.4539,
+ "step": 1445
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018565616731268888,
+ "loss": 1.613,
+ "step": 1446
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856366756041545,
+ "loss": 1.757,
+ "step": 1447
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018561717168569816,
+ "loss": 1.6903,
+ "step": 1448
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018559765556010072,
+ "loss": 1.7322,
+ "step": 1449
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018557812723014476,
+ "loss": 1.5627,
+ "step": 1450
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018555858669861458,
+ "loss": 1.8751,
+ "step": 1451
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018553903396829625,
+ "loss": 1.2721,
+ "step": 1452
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018551946904197752,
+ "loss": 1.8167,
+ "step": 1453
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018549989192244797,
+ "loss": 1.6602,
+ "step": 1454
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018548030261249885,
+ "loss": 1.9053,
+ "step": 1455
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018546070111492315,
+ "loss": 1.7721,
+ "step": 1456
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018544108743251566,
+ "loss": 2.1421,
+ "step": 1457
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018542146156807284,
+ "loss": 1.5076,
+ "step": 1458
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018540182352439288,
+ "loss": 1.9039,
+ "step": 1459
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018538217330427582,
+ "loss": 1.9777,
+ "step": 1460
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018536251091052323,
+ "loss": 1.5702,
+ "step": 1461
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018534283634593862,
+ "loss": 1.851,
+ "step": 1462
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018532314961332717,
+ "loss": 1.5337,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "eval_loss": 2.068387508392334,
+ "eval_runtime": 283.4638,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018530345071549574,
+ "loss": 1.7553,
+ "step": 1464
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018528373965525296,
+ "loss": 1.4175,
+ "step": 1465
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018526401643540922,
+ "loss": 1.7216,
+ "step": 1466
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018524428105877664,
+ "loss": 1.6415,
+ "step": 1467
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018522453352816896,
+ "loss": 1.7284,
+ "step": 1468
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018520477384640187,
+ "loss": 1.8314,
+ "step": 1469
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018518500201629258,
+ "loss": 1.8341,
+ "step": 1470
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018516521804066015,
+ "loss": 1.4129,
+ "step": 1471
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018514542192232537,
+ "loss": 1.4671,
+ "step": 1472
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018512561366411067,
+ "loss": 1.6665,
+ "step": 1473
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018510579326884034,
+ "loss": 1.5722,
+ "step": 1474
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850859607393403,
+ "loss": 1.9348,
+ "step": 1475
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850661160784383,
+ "loss": 1.5404,
+ "step": 1476
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018504625928896363,
+ "loss": 1.4769,
+ "step": 1477
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018502639037374757,
+ "loss": 1.4149,
+ "step": 1478
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850065093356229,
+ "loss": 1.958,
+ "step": 1479
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018498661617742426,
+ "loss": 1.8319,
+ "step": 1480
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018496671090198797,
+ "loss": 1.5948,
+ "step": 1481
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001849467935121521,
+ "loss": 1.8469,
+ "step": 1482
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018492686401075644,
+ "loss": 1.6798,
+ "step": 1483
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001849069224006425,
+ "loss": 1.8197,
+ "step": 1484
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848869686846535,
+ "loss": 1.6613,
+ "step": 1485
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848670028656344,
+ "loss": 1.7322,
+ "step": 1486
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018484702494643188,
+ "loss": 2.0493,
+ "step": 1487
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018482703492989444,
+ "loss": 1.7182,
+ "step": 1488
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018480703281887215,
+ "loss": 1.689,
+ "step": 1489
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018478701861621686,
+ "loss": 1.9477,
+ "step": 1490
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001847669923247822,
+ "loss": 1.8171,
+ "step": 1491
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018474695394742345,
+ "loss": 1.7337,
+ "step": 1492
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847269034869977,
+ "loss": 1.6983,
+ "step": 1493
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847068409463636,
+ "loss": 1.6445,
+ "step": 1494
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846867663283818,
+ "loss": 1.9965,
+ "step": 1495
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846666796359143,
+ "loss": 1.6775,
+ "step": 1496
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846465808718252,
+ "loss": 1.8117,
+ "step": 1497
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018462647003898006,
+ "loss": 1.8803,
+ "step": 1498
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018460634714024624,
+ "loss": 1.3045,
+ "step": 1499
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018458621217849286,
+ "loss": 1.7768,
+ "step": 1500
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018456606515659073,
+ "loss": 2.0641,
+ "step": 1501
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001845459060774123,
+ "loss": 1.3804,
+ "step": 1502
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018452573494383192,
+ "loss": 1.6271,
+ "step": 1503
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018450555175872547,
+ "loss": 1.8525,
+ "step": 1504
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018448535652497073,
+ "loss": 1.5303,
+ "step": 1505
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.000184465149245447,
+ "loss": 2.0368,
+ "step": 1506
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018444492992303544,
+ "loss": 1.9951,
+ "step": 1507
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001844246985606189,
+ "loss": 1.8715,
+ "step": 1508
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018440445516108186,
+ "loss": 1.7373,
+ "step": 1509
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018438419972731067,
+ "loss": 1.7667,
+ "step": 1510
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018436393226219327,
+ "loss": 1.5134,
+ "step": 1511
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018434365276861938,
+ "loss": 1.3891,
+ "step": 1512
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001843233612494804,
+ "loss": 1.7066,
+ "step": 1513
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018430305770766948,
+ "loss": 1.6366,
+ "step": 1514
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842827421460814,
+ "loss": 1.7838,
+ "step": 1515
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842624145676128,
+ "loss": 1.7884,
+ "step": 1516
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001842420749751619,
+ "loss": 1.8428,
+ "step": 1517
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018422172337162867,
+ "loss": 1.4987,
+ "step": 1518
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018420135975991483,
+ "loss": 1.7576,
+ "step": 1519
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001841809841429238,
+ "loss": 1.8522,
+ "step": 1520
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018416059652356066,
+ "loss": 1.9308,
+ "step": 1521
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018414019690473227,
+ "loss": 1.4658,
+ "step": 1522
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018411978528934717,
+ "loss": 1.7072,
+ "step": 1523
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001840993616803156,
+ "loss": 1.736,
+ "step": 1524
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840789260805495,
+ "loss": 1.7712,
+ "step": 1525
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840584784929626,
+ "loss": 1.2231,
+ "step": 1526
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018403801892047023,
+ "loss": 1.8421,
+ "step": 1527
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018401754736598947,
+ "loss": 1.2689,
+ "step": 1528
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018399706383243918,
+ "loss": 1.8062,
+ "step": 1529
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839765683227398,
+ "loss": 1.6846,
+ "step": 1530
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839560608398136,
+ "loss": 1.8201,
+ "step": 1531
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018393554138658441,
+ "loss": 1.6958,
+ "step": 1532
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018391500996597796,
+ "loss": 1.8487,
+ "step": 1533
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.0001838944665809215,
+ "loss": 1.9788,
+ "step": 1534
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018387391123434412,
+ "loss": 1.6002,
+ "step": 1535
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018385334392917658,
+ "loss": 1.3859,
+ "step": 1536
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018383276466835127,
+ "loss": 2.0743,
+ "step": 1537
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018381217345480235,
+ "loss": 1.8357,
+ "step": 1538
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018379157029146573,
+ "loss": 1.7002,
+ "step": 1539
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018377095518127897,
+ "loss": 1.3058,
+ "step": 1540
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018375032812718124,
+ "loss": 1.8745,
+ "step": 1541
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018372968913211364,
+ "loss": 1.7847,
+ "step": 1542
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018370903819901874,
+ "loss": 1.8156,
+ "step": 1543
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018368837533084095,
+ "loss": 2.0152,
+ "step": 1544
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018366770053052634,
+ "loss": 1.5656,
+ "step": 1545
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018364701380102266,
+ "loss": 1.5753,
+ "step": 1546
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018362631514527947,
+ "loss": 1.3938,
+ "step": 1547
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018360560456624788,
+ "loss": 1.9599,
+ "step": 1548
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018358488206688075,
+ "loss": 1.8641,
+ "step": 1549
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018356414765013267,
+ "loss": 1.8428,
+ "step": 1550
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018354340131895998,
+ "loss": 1.6016,
+ "step": 1551
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018352264307632056,
+ "loss": 1.5768,
+ "step": 1552
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018350187292517415,
+ "loss": 1.5369,
+ "step": 1553
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.0001834810908684821,
+ "loss": 1.9717,
+ "step": 1554
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018346029690920746,
+ "loss": 1.943,
+ "step": 1555
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018343949105031505,
+ "loss": 1.8166,
+ "step": 1556
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018341867329477125,
+ "loss": 1.7149,
+ "step": 1557
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018339784364554426,
+ "loss": 1.4657,
+ "step": 1558
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018337700210560397,
+ "loss": 1.8693,
+ "step": 1559
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018335614867792183,
+ "loss": 1.7656,
+ "step": 1560
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001833352833654712,
+ "loss": 1.5123,
+ "step": 1561
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018331440617122696,
+ "loss": 1.7884,
+ "step": 1562
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832935170981657,
+ "loss": 1.7309,
+ "step": 1563
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018327261614926583,
+ "loss": 1.9628,
+ "step": 1564
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018325170332750732,
+ "loss": 1.6409,
+ "step": 1565
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832307786358719,
+ "loss": 1.6093,
+ "step": 1566
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018320984207734298,
+ "loss": 1.6111,
+ "step": 1567
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018318889365490565,
+ "loss": 2.0085,
+ "step": 1568
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018316793337154664,
+ "loss": 2.079,
+ "step": 1569
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018314696123025454,
+ "loss": 1.5466,
+ "step": 1570
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018312597723401942,
+ "loss": 2.0825,
+ "step": 1571
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.0001831049813858332,
+ "loss": 1.9748,
+ "step": 1572
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018308397368868945,
+ "loss": 1.6529,
+ "step": 1573
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018306295414558335,
+ "loss": 1.7119,
+ "step": 1574
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018304192275951184,
+ "loss": 1.8812,
+ "step": 1575
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018302087953347352,
+ "loss": 1.8676,
+ "step": 1576
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018299982447046877,
+ "loss": 1.879,
+ "step": 1577
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018297875757349952,
+ "loss": 1.6282,
+ "step": 1578
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018295767884556947,
+ "loss": 1.735,
+ "step": 1579
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018293658828968397,
+ "loss": 1.5796,
+ "step": 1580
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018291548590885007,
+ "loss": 1.8258,
+ "step": 1581
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018289437170607658,
+ "loss": 1.7531,
+ "step": 1582
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018287324568437381,
+ "loss": 1.6265,
+ "step": 1583
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018285210784675394,
+ "loss": 1.7997,
+ "step": 1584
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018283095819623078,
+ "loss": 1.955,
+ "step": 1585
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018280979673581977,
+ "loss": 1.6542,
+ "step": 1586
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018278862346853808,
+ "loss": 1.7634,
+ "step": 1587
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018276743839740458,
+ "loss": 2.0077,
+ "step": 1588
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018274624152543977,
+ "loss": 2.0254,
+ "step": 1589
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018272503285566587,
+ "loss": 1.4464,
+ "step": 1590
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018270381239110677,
+ "loss": 1.8643,
+ "step": 1591
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018268258013478804,
+ "loss": 1.3278,
+ "step": 1592
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018266133608973696,
+ "loss": 1.744,
+ "step": 1593
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018264008025898248,
+ "loss": 1.5079,
+ "step": 1594
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018261881264555516,
+ "loss": 1.9655,
+ "step": 1595
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.0001825975332524873,
+ "loss": 2.0557,
+ "step": 1596
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.000182576242082813,
+ "loss": 1.7174,
+ "step": 1597
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018255493913956774,
+ "loss": 1.449,
+ "step": 1598
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018253362442578896,
+ "loss": 1.9058,
+ "step": 1599
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018251229794451567,
+ "loss": 1.3482,
+ "step": 1600
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018249095969878853,
+ "loss": 1.7906,
+ "step": 1601
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018246960969164994,
+ "loss": 1.6177,
+ "step": 1602
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018244824792614393,
+ "loss": 1.5786,
+ "step": 1603
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018242687440531618,
+ "loss": 1.6451,
+ "step": 1604
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018240548913221416,
+ "loss": 1.3695,
+ "step": 1605
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.0001823840921098869,
+ "loss": 1.6648,
+ "step": 1606
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018236268334138515,
+ "loss": 2.1548,
+ "step": 1607
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018234126282976133,
+ "loss": 1.6153,
+ "step": 1608
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001823198305780696,
+ "loss": 1.741,
+ "step": 1609
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018229838658936564,
+ "loss": 1.7827,
+ "step": 1610
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018227693086670697,
+ "loss": 1.7343,
+ "step": 1611
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018225546341315261,
+ "loss": 1.8149,
+ "step": 1612
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001822339842317635,
+ "loss": 1.5497,
+ "step": 1613
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018221249332560198,
+ "loss": 1.7659,
+ "step": 1614
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001821909906977322,
+ "loss": 1.8992,
+ "step": 1615
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018216947635122,
+ "loss": 1.8682,
+ "step": 1616
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018214795028913288,
+ "loss": 1.9774,
+ "step": 1617
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001821264125145399,
+ "loss": 1.9441,
+ "step": 1618
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018210486303051195,
+ "loss": 2.0314,
+ "step": 1619
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001820833018401215,
+ "loss": 1.8234,
+ "step": 1620
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018206172894644272,
+ "loss": 1.9478,
+ "step": 1621
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018204014435255135,
+ "loss": 1.7894,
+ "step": 1622
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.000182018548061525,
+ "loss": 1.5469,
+ "step": 1623
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018199694007644277,
+ "loss": 1.9419,
+ "step": 1624
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018197532040038547,
+ "loss": 1.6686,
+ "step": 1625
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018195368903643563,
+ "loss": 2.2525,
+ "step": 1626
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018193204598767744,
+ "loss": 1.8076,
+ "step": 1627
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018191039125719662,
+ "loss": 1.976,
+ "step": 1628
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018188872484808076,
+ "loss": 1.6896,
+ "step": 1629
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018186704676341898,
+ "loss": 1.6784,
+ "step": 1630
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018184535700630213,
+ "loss": 1.9634,
+ "step": 1631
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018182365557982264,
+ "loss": 1.7406,
+ "step": 1632
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018180194248707473,
+ "loss": 1.7492,
+ "step": 1633
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018178021773115414,
+ "loss": 1.7731,
+ "step": 1634
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018175848131515837,
+ "loss": 1.6232,
+ "step": 1635
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817367332421866,
+ "loss": 1.7488,
+ "step": 1636
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817149735153396,
+ "loss": 1.3398,
+ "step": 1637
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018169320213771983,
+ "loss": 1.4521,
+ "step": 1638
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018167141911243145,
+ "loss": 1.6311,
+ "step": 1639
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018164962444258014,
+ "loss": 1.8911,
+ "step": 1640
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018162781813127346,
+ "loss": 1.9879,
+ "step": 1641
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001816060001816205,
+ "loss": 1.5637,
+ "step": 1642
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018158417059673196,
+ "loss": 1.7461,
+ "step": 1643
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815623293797203,
+ "loss": 1.6671,
+ "step": 1644
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815404765336996,
+ "loss": 1.2124,
+ "step": 1645
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815186120617856,
+ "loss": 1.6402,
+ "step": 1646
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001814967359670957,
+ "loss": 1.8837,
+ "step": 1647
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018147484825274893,
+ "loss": 1.8027,
+ "step": 1648
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018145294892186605,
+ "loss": 1.7684,
+ "step": 1649
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.0001814310379775694,
+ "loss": 1.8274,
+ "step": 1650
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.000181409115422983,
+ "loss": 1.8292,
+ "step": 1651
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018138718126123248,
+ "loss": 1.3492,
+ "step": 1652
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018136523549544523,
+ "loss": 1.509,
+ "step": 1653
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018134327812875024,
+ "loss": 1.7415,
+ "step": 1654
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018132130916427816,
+ "loss": 1.5223,
+ "step": 1655
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018129932860516126,
+ "loss": 1.9294,
+ "step": 1656
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018127733645453348,
+ "loss": 2.0716,
+ "step": 1657
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018125533271553043,
+ "loss": 1.57,
+ "step": 1658
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018123331739128938,
+ "loss": 2.2132,
+ "step": 1659
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018121129048494922,
+ "loss": 1.9006,
+ "step": 1660
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018118925199965048,
+ "loss": 1.9319,
+ "step": 1661
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018116720193853543,
+ "loss": 1.8103,
+ "step": 1662
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018114514030474787,
+ "loss": 1.7028,
+ "step": 1663
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018112306710143334,
+ "loss": 1.802,
+ "step": 1664
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.000181100982331739,
+ "loss": 1.6835,
+ "step": 1665
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.0001810788859988136,
+ "loss": 1.7223,
+ "step": 1666
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810567781058077,
+ "loss": 1.5829,
+ "step": 1667
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018103465865587333,
+ "loss": 1.9863,
+ "step": 1668
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810125276521642,
+ "loss": 1.6398,
+ "step": 1669
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018099038509783582,
+ "loss": 1.9261,
+ "step": 1670
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018096823099604517,
+ "loss": 1.8882,
+ "step": 1671
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018094606534995093,
+ "loss": 1.6716,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "eval_loss": 2.075261354446411,
+ "eval_runtime": 283.9438,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018092388816271345,
+ "loss": 1.6688,
+ "step": 1673
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018090169943749476,
+ "loss": 1.9127,
+ "step": 1674
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001808794991774584,
+ "loss": 1.7214,
+ "step": 1675
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018085728738576973,
+ "loss": 1.785,
+ "step": 1676
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018083506406559561,
+ "loss": 1.5287,
+ "step": 1677
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018081282922010464,
+ "loss": 1.9012,
+ "step": 1678
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018079058285246698,
+ "loss": 1.3094,
+ "step": 1679
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807683249658545,
+ "loss": 1.818,
+ "step": 1680
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807460555634407,
+ "loss": 1.9389,
+ "step": 1681
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807237746484007,
+ "loss": 1.4334,
+ "step": 1682
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018070148222391126,
+ "loss": 1.5422,
+ "step": 1683
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806791782931508,
+ "loss": 1.7899,
+ "step": 1684
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806568628592994,
+ "loss": 1.6106,
+ "step": 1685
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018063453592553872,
+ "loss": 1.9807,
+ "step": 1686
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806121974950521,
+ "loss": 1.1762,
+ "step": 1687
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018058984757102456,
+ "loss": 1.8338,
+ "step": 1688
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001805674861566426,
+ "loss": 1.5556,
+ "step": 1689
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805451132550946,
+ "loss": 0.87,
+ "step": 1690
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018052272886957038,
+ "loss": 1.0386,
+ "step": 1691
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805003330032615,
+ "loss": 0.8153,
+ "step": 1692
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018047792565936102,
+ "loss": 1.1745,
+ "step": 1693
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018045550684106388,
+ "loss": 1.1584,
+ "step": 1694
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018043307655156644,
+ "loss": 1.0742,
+ "step": 1695
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018041063479406675,
+ "loss": 1.0537,
+ "step": 1696
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803881815717646,
+ "loss": 1.0239,
+ "step": 1697
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803657168878612,
+ "loss": 0.9182,
+ "step": 1698
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018034324074555965,
+ "loss": 1.1856,
+ "step": 1699
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018032075314806448,
+ "loss": 1.3285,
+ "step": 1700
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018029825409858198,
+ "loss": 1.2912,
+ "step": 1701
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018027574360032,
+ "loss": 1.3666,
+ "step": 1702
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018025322165648807,
+ "loss": 0.9621,
+ "step": 1703
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018023068827029723,
+ "loss": 0.8484,
+ "step": 1704
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018020814344496037,
+ "loss": 1.2236,
+ "step": 1705
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018018558718369186,
+ "loss": 0.8155,
+ "step": 1706
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.0001801630194897077,
+ "loss": 1.2047,
+ "step": 1707
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018014044036622555,
+ "loss": 1.0269,
+ "step": 1708
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018011784981646474,
+ "loss": 1.0536,
+ "step": 1709
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018009524784364615,
+ "loss": 1.0516,
+ "step": 1710
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018007263445099235,
+ "loss": 0.9087,
+ "step": 1711
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001800500096417275,
+ "loss": 1.3057,
+ "step": 1712
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018002737341907743,
+ "loss": 0.8791,
+ "step": 1713
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018000472578626956,
+ "loss": 1.1667,
+ "step": 1714
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017998206674653294,
+ "loss": 1.1026,
+ "step": 1715
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017995939630309826,
+ "loss": 1.3228,
+ "step": 1716
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001799367144591978,
+ "loss": 0.9173,
+ "step": 1717
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017991402121806557,
+ "loss": 1.0067,
+ "step": 1718
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798913165829371,
+ "loss": 1.0256,
+ "step": 1719
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017986860055704953,
+ "loss": 0.7645,
+ "step": 1720
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798458731436417,
+ "loss": 1.0567,
+ "step": 1721
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017982313434595406,
+ "loss": 0.7465,
+ "step": 1722
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017980038416722863,
+ "loss": 1.3268,
+ "step": 1723
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017977762261070916,
+ "loss": 0.9917,
+ "step": 1724
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017975484967964087,
+ "loss": 0.8592,
+ "step": 1725
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017973206537727073,
+ "loss": 1.43,
+ "step": 1726
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017970926970684725,
+ "loss": 1.3679,
+ "step": 1727
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017968646267162063,
+ "loss": 1.2959,
+ "step": 1728
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017966364427484267,
+ "loss": 1.0674,
+ "step": 1729
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017964081451976672,
+ "loss": 1.1153,
+ "step": 1730
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017961797340964783,
+ "loss": 1.0586,
+ "step": 1731
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017959512094774266,
+ "loss": 1.2388,
+ "step": 1732
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017957225713730949,
+ "loss": 1.257,
+ "step": 1733
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.0001795493819816081,
+ "loss": 1.099,
+ "step": 1734
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001795264954839001,
+ "loss": 0.9532,
+ "step": 1735
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017950359764744859,
+ "loss": 1.2553,
+ "step": 1736
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017948068847551825,
+ "loss": 0.9973,
+ "step": 1737
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017945776797137543,
+ "loss": 1.0637,
+ "step": 1738
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017943483613828815,
+ "loss": 1.1815,
+ "step": 1739
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017941189297952597,
+ "loss": 0.8378,
+ "step": 1740
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017938893849836002,
+ "loss": 0.9375,
+ "step": 1741
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017936597269806322,
+ "loss": 0.9653,
+ "step": 1742
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001793429955819099,
+ "loss": 1.221,
+ "step": 1743
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017932000715317612,
+ "loss": 1.041,
+ "step": 1744
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017929700741513955,
+ "loss": 1.0724,
+ "step": 1745
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017927399637107945,
+ "loss": 1.1102,
+ "step": 1746
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017925097402427667,
+ "loss": 0.8542,
+ "step": 1747
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001792279403780137,
+ "loss": 1.2339,
+ "step": 1748
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017920489543557465,
+ "loss": 0.8671,
+ "step": 1749
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791818392002452,
+ "loss": 0.9779,
+ "step": 1750
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791587716753127,
+ "loss": 1.1242,
+ "step": 1751
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017913569286406603,
+ "loss": 0.9043,
+ "step": 1752
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001791126027697958,
+ "loss": 0.7996,
+ "step": 1753
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017908950139579406,
+ "loss": 0.8602,
+ "step": 1754
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017906638874535462,
+ "loss": 1.0161,
+ "step": 1755
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017904326482177284,
+ "loss": 0.8226,
+ "step": 1756
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017902012962834566,
+ "loss": 1.3885,
+ "step": 1757
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001789969831683717,
+ "loss": 1.2158,
+ "step": 1758
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017897382544515108,
+ "loss": 1.3261,
+ "step": 1759
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017895065646198567,
+ "loss": 1.2144,
+ "step": 1760
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017892747622217875,
+ "loss": 0.9881,
+ "step": 1761
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001789042847290354,
+ "loss": 1.0342,
+ "step": 1762
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017888108198586217,
+ "loss": 0.7883,
+ "step": 1763
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017885786799596732,
+ "loss": 0.9006,
+ "step": 1764
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017883464276266064,
+ "loss": 1.3695,
+ "step": 1765
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001788114062892535,
+ "loss": 1.0303,
+ "step": 1766
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017878815857905897,
+ "loss": 1.3816,
+ "step": 1767
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001787648996353916,
+ "loss": 0.8684,
+ "step": 1768
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017874162946156772,
+ "loss": 1.1157,
+ "step": 1769
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017871834806090501,
+ "loss": 1.0087,
+ "step": 1770
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.000178695055436723,
+ "loss": 0.7173,
+ "step": 1771
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017867175159234265,
+ "loss": 1.4784,
+ "step": 1772
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017864843653108662,
+ "loss": 1.1401,
+ "step": 1773
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786251102562791,
+ "loss": 1.0952,
+ "step": 1774
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786017727712459,
+ "loss": 0.9443,
+ "step": 1775
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017857842407931445,
+ "loss": 1.0682,
+ "step": 1776
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785550641838138,
+ "loss": 0.9402,
+ "step": 1777
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017853169308807448,
+ "loss": 1.0576,
+ "step": 1778
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785083107954288,
+ "loss": 1.1425,
+ "step": 1779
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017848491730921046,
+ "loss": 1.1402,
+ "step": 1780
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017846151263275494,
+ "loss": 1.4482,
+ "step": 1781
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017843809676939922,
+ "loss": 0.7765,
+ "step": 1782
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017841466972248188,
+ "loss": 1.1478,
+ "step": 1783
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001783912314953431,
+ "loss": 1.1876,
+ "step": 1784
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017836778209132464,
+ "loss": 1.2036,
+ "step": 1785
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783443215137699,
+ "loss": 1.0297,
+ "step": 1786
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783208497660239,
+ "loss": 0.8186,
+ "step": 1787
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017829736685143308,
+ "loss": 0.7258,
+ "step": 1788
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017827387277334568,
+ "loss": 0.8072,
+ "step": 1789
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017825036753511144,
+ "loss": 1.0474,
+ "step": 1790
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017822685114008167,
+ "loss": 1.2141,
+ "step": 1791
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017820332359160928,
+ "loss": 1.1443,
+ "step": 1792
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001781797848930488,
+ "loss": 0.9864,
+ "step": 1793
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017815623504775636,
+ "loss": 1.2998,
+ "step": 1794
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001781326740590896,
+ "loss": 1.0672,
+ "step": 1795
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017810910193040785,
+ "loss": 0.9152,
+ "step": 1796
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.000178085518665072,
+ "loss": 1.2555,
+ "step": 1797
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017806192426644444,
+ "loss": 1.2085,
+ "step": 1798
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017803831873788926,
+ "loss": 1.6205,
+ "step": 1799
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001780147020827721,
+ "loss": 1.3382,
+ "step": 1800
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017799107430446016,
+ "loss": 1.3309,
+ "step": 1801
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017796743540632223,
+ "loss": 1.2556,
+ "step": 1802
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017794378539172877,
+ "loss": 0.829,
+ "step": 1803
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017792012426405166,
+ "loss": 1.1711,
+ "step": 1804
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017789645202666456,
+ "loss": 1.0128,
+ "step": 1805
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017787276868294253,
+ "loss": 1.2074,
+ "step": 1806
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017784907423626237,
+ "loss": 1.0996,
+ "step": 1807
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778253686900023,
+ "loss": 0.9608,
+ "step": 1808
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778016520475423,
+ "loss": 0.827,
+ "step": 1809
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017777792431226383,
+ "loss": 1.2365,
+ "step": 1810
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017775418548754993,
+ "loss": 1.0276,
+ "step": 1811
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777304355767852,
+ "loss": 0.8178,
+ "step": 1812
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777066745833559,
+ "loss": 1.1297,
+ "step": 1813
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017768290251064987,
+ "loss": 1.1737,
+ "step": 1814
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017765911936205644,
+ "loss": 1.1606,
+ "step": 1815
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017763532514096658,
+ "loss": 1.2605,
+ "step": 1816
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001776115198507728,
+ "loss": 1.2271,
+ "step": 1817
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017758770349486923,
+ "loss": 0.9407,
+ "step": 1818
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001775638760766516,
+ "loss": 1.0273,
+ "step": 1819
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017754003759951715,
+ "loss": 1.0746,
+ "step": 1820
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017751618806686472,
+ "loss": 1.0091,
+ "step": 1821
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017749232748209473,
+ "loss": 0.997,
+ "step": 1822
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001774684558486092,
+ "loss": 1.4814,
+ "step": 1823
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017744457316981168,
+ "loss": 1.1407,
+ "step": 1824
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017742067944910737,
+ "loss": 0.9824,
+ "step": 1825
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017739677468990293,
+ "loss": 1.2603,
+ "step": 1826
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017737285889560668,
+ "loss": 1.3721,
+ "step": 1827
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017734893206962853,
+ "loss": 1.1186,
+ "step": 1828
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017732499421537984,
+ "loss": 0.7693,
+ "step": 1829
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001773010453362737,
+ "loss": 1.0449,
+ "step": 1830
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017727708543572467,
+ "loss": 0.9331,
+ "step": 1831
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001772531145171489,
+ "loss": 0.739,
+ "step": 1832
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017722913258396417,
+ "loss": 0.9076,
+ "step": 1833
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017720513963958968,
+ "loss": 1.3464,
+ "step": 1834
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017718113568744638,
+ "loss": 0.8858,
+ "step": 1835
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017715712073095672,
+ "loss": 1.3204,
+ "step": 1836
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017713309477354467,
+ "loss": 1.0538,
+ "step": 1837
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001771090578186358,
+ "loss": 1.44,
+ "step": 1838
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770850098696573,
+ "loss": 1.0167,
+ "step": 1839
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017706095093003785,
+ "loss": 0.9724,
+ "step": 1840
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017703688100320774,
+ "loss": 0.8055,
+ "step": 1841
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770128000925988,
+ "loss": 0.7363,
+ "step": 1842
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017698870820164446,
+ "loss": 1.1329,
+ "step": 1843
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017696460533377968,
+ "loss": 0.9487,
+ "step": 1844
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017694049149244104,
+ "loss": 1.2571,
+ "step": 1845
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001769163666810666,
+ "loss": 0.9148,
+ "step": 1846
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017689223090309607,
+ "loss": 1.4676,
+ "step": 1847
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017686808416197072,
+ "loss": 0.9395,
+ "step": 1848
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017684392646113325,
+ "loss": 0.9632,
+ "step": 1849
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017681975780402807,
+ "loss": 1.0037,
+ "step": 1850
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001767955781941011,
+ "loss": 0.9557,
+ "step": 1851
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017677138763479985,
+ "loss": 1.2799,
+ "step": 1852
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017674718612957336,
+ "loss": 0.8461,
+ "step": 1853
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001767229736818722,
+ "loss": 1.2762,
+ "step": 1854
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017669875029514856,
+ "loss": 1.4801,
+ "step": 1855
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017667451597285617,
+ "loss": 0.9849,
+ "step": 1856
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766502707184503,
+ "loss": 1.0875,
+ "step": 1857
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017662601453538783,
+ "loss": 0.8346,
+ "step": 1858
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766017474271271,
+ "loss": 1.1933,
+ "step": 1859
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017657746939712815,
+ "loss": 0.8789,
+ "step": 1860
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017655318044885245,
+ "loss": 1.0091,
+ "step": 1861
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.0001765288805857631,
+ "loss": 0.7371,
+ "step": 1862
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017650456981132466,
+ "loss": 0.8131,
+ "step": 1863
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017648024812900342,
+ "loss": 1.0795,
+ "step": 1864
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.000176455915542267,
+ "loss": 0.9882,
+ "step": 1865
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017643157205458483,
+ "loss": 1.212,
+ "step": 1866
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017640721766942768,
+ "loss": 1.4755,
+ "step": 1867
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017638285239026798,
+ "loss": 1.0391,
+ "step": 1868
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017635847622057965,
+ "loss": 1.2568,
+ "step": 1869
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017633408916383826,
+ "loss": 1.2138,
+ "step": 1870
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001763096912235208,
+ "loss": 1.196,
+ "step": 1871
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017628528240310596,
+ "loss": 1.1476,
+ "step": 1872
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017626086270607384,
+ "loss": 1.1421,
+ "step": 1873
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017623643213590619,
+ "loss": 1.0711,
+ "step": 1874
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001762119906960863,
+ "loss": 0.8842,
+ "step": 1875
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017618753839009893,
+ "loss": 0.798,
+ "step": 1876
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761630752214305,
+ "loss": 0.8591,
+ "step": 1877
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017613860119356883,
+ "loss": 0.7646,
+ "step": 1878
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761141163100035,
+ "loss": 1.4113,
+ "step": 1879
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017608962057422549,
+ "loss": 0.8605,
+ "step": 1880
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017606511398972731,
+ "loss": 0.6179,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "eval_loss": 2.3971996307373047,
+ "eval_runtime": 283.7444,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760405965600031,
+ "loss": 0.8651,
+ "step": 1882
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760160682885485,
+ "loss": 1.3178,
+ "step": 1883
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017599152917886071,
+ "loss": 0.9233,
+ "step": 1884
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017596697923443847,
+ "loss": 0.9126,
+ "step": 1885
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001759424184587821,
+ "loss": 0.9749,
+ "step": 1886
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017591784685539334,
+ "loss": 1.1929,
+ "step": 1887
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017589326442777565,
+ "loss": 1.2026,
+ "step": 1888
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017586867117943392,
+ "loss": 1.1162,
+ "step": 1889
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017584406711387463,
+ "loss": 0.9818,
+ "step": 1890
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001758194522346057,
+ "loss": 0.9802,
+ "step": 1891
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001757948265451368,
+ "loss": 0.8963,
+ "step": 1892
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017577019004897897,
+ "loss": 1.0359,
+ "step": 1893
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017574554274964478,
+ "loss": 1.0788,
+ "step": 1894
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017572088465064848,
+ "loss": 0.9415,
+ "step": 1895
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756962157555057,
+ "loss": 1.0944,
+ "step": 1896
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017567153606773373,
+ "loss": 1.357,
+ "step": 1897
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017564684559085136,
+ "loss": 1.0108,
+ "step": 1898
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756221443283789,
+ "loss": 0.5337,
+ "step": 1899
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755974322838382,
+ "loss": 1.4234,
+ "step": 1900
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755727094607527,
+ "loss": 0.9083,
+ "step": 1901
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017554797586264727,
+ "loss": 0.9199,
+ "step": 1902
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017552323149304844,
+ "loss": 1.1885,
+ "step": 1903
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754984763554842,
+ "loss": 1.276,
+ "step": 1904
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754737104534841,
+ "loss": 0.8882,
+ "step": 1905
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017544893379057918,
+ "loss": 0.993,
+ "step": 1906
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754241463703021,
+ "loss": 1.261,
+ "step": 1907
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017539934819618696,
+ "loss": 0.9877,
+ "step": 1908
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017537453927176947,
+ "loss": 0.9991,
+ "step": 1909
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017534971960058685,
+ "loss": 1.2012,
+ "step": 1910
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001753248891861778,
+ "loss": 0.864,
+ "step": 1911
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017530004803208263,
+ "loss": 1.0382,
+ "step": 1912
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017527519614184316,
+ "loss": 1.068,
+ "step": 1913
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017525033351900268,
+ "loss": 0.8687,
+ "step": 1914
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752254601671061,
+ "loss": 1.1174,
+ "step": 1915
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752005760896998,
+ "loss": 1.269,
+ "step": 1916
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751756812903317,
+ "loss": 0.7387,
+ "step": 1917
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751507757725513,
+ "loss": 0.8484,
+ "step": 1918
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001751258595399095,
+ "loss": 1.0092,
+ "step": 1919
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017510093259595885,
+ "loss": 1.0145,
+ "step": 1920
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017507599494425344,
+ "loss": 1.2969,
+ "step": 1921
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017505104658834875,
+ "loss": 0.7925,
+ "step": 1922
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017502608753180196,
+ "loss": 0.8974,
+ "step": 1923
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017500111777817164,
+ "loss": 0.764,
+ "step": 1924
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001749761373310179,
+ "loss": 1.1057,
+ "step": 1925
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017495114619390246,
+ "loss": 0.8092,
+ "step": 1926
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017492614437038845,
+ "loss": 0.9553,
+ "step": 1927
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017490113186404067,
+ "loss": 1.0278,
+ "step": 1928
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748761086784253,
+ "loss": 1.2152,
+ "step": 1929
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017485107481711012,
+ "loss": 1.5154,
+ "step": 1930
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748260302836644,
+ "loss": 1.1973,
+ "step": 1931
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017480097508165896,
+ "loss": 0.9429,
+ "step": 1932
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747759092146661,
+ "loss": 1.5453,
+ "step": 1933
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747508326862597,
+ "loss": 1.1691,
+ "step": 1934
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017472574550001508,
+ "loss": 1.2094,
+ "step": 1935
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017470064765950918,
+ "loss": 1.0777,
+ "step": 1936
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017467553916832035,
+ "loss": 1.0883,
+ "step": 1937
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017465042003002857,
+ "loss": 0.9297,
+ "step": 1938
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017462529024821522,
+ "loss": 0.7814,
+ "step": 1939
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017460014982646334,
+ "loss": 1.3645,
+ "step": 1940
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.0001745749987683573,
+ "loss": 1.0604,
+ "step": 1941
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017454983707748317,
+ "loss": 0.9416,
+ "step": 1942
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017452466475742845,
+ "loss": 1.4187,
+ "step": 1943
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017449948181178215,
+ "loss": 1.1619,
+ "step": 1944
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017447428824413482,
+ "loss": 1.1381,
+ "step": 1945
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017444908405807845,
+ "loss": 1.2304,
+ "step": 1946
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.0001744238692572067,
+ "loss": 1.2149,
+ "step": 1947
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017439864384511463,
+ "loss": 0.8172,
+ "step": 1948
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017437340782539877,
+ "loss": 1.0783,
+ "step": 1949
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017434816120165728,
+ "loss": 1.0661,
+ "step": 1950
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017432290397748982,
+ "loss": 1.1959,
+ "step": 1951
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001742976361564974,
+ "loss": 1.0581,
+ "step": 1952
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017427235774228274,
+ "loss": 0.8948,
+ "step": 1953
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017424706873845,
+ "loss": 1.2565,
+ "step": 1954
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017422176914860476,
+ "loss": 0.9237,
+ "step": 1955
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017419645897635432,
+ "loss": 1.219,
+ "step": 1956
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017417113822530727,
+ "loss": 1.4606,
+ "step": 1957
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017414580689907377,
+ "loss": 0.714,
+ "step": 1958
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001741204650012656,
+ "loss": 1.2223,
+ "step": 1959
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017409511253549593,
+ "loss": 0.9828,
+ "step": 1960
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017406974950537942,
+ "loss": 0.9954,
+ "step": 1961
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017404437591453235,
+ "loss": 1.0307,
+ "step": 1962
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001740189917665724,
+ "loss": 0.9331,
+ "step": 1963
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739935970651188,
+ "loss": 1.3517,
+ "step": 1964
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017396819181379232,
+ "loss": 1.2024,
+ "step": 1965
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739427760162151,
+ "loss": 0.9696,
+ "step": 1966
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017391734967601102,
+ "loss": 1.1559,
+ "step": 1967
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001738919127968052,
+ "loss": 1.3104,
+ "step": 1968
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017386646538222443,
+ "loss": 0.9073,
+ "step": 1969
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017384100743589697,
+ "loss": 1.0539,
+ "step": 1970
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017381553896145255,
+ "loss": 0.9873,
+ "step": 1971
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737900599625224,
+ "loss": 0.9466,
+ "step": 1972
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737645704427393,
+ "loss": 1.0639,
+ "step": 1973
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737390704057375,
+ "loss": 0.5843,
+ "step": 1974
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017371355985515275,
+ "loss": 1.1318,
+ "step": 1975
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017368803879462227,
+ "loss": 1.0116,
+ "step": 1976
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001736625072277848,
+ "loss": 0.8845,
+ "step": 1977
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017363696515828062,
+ "loss": 0.8081,
+ "step": 1978
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017361141258975148,
+ "loss": 0.8795,
+ "step": 1979
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735858495258406,
+ "loss": 0.9725,
+ "step": 1980
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735602759701927,
+ "loss": 1.0164,
+ "step": 1981
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017353469192645405,
+ "loss": 1.2937,
+ "step": 1982
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735090973982723,
+ "loss": 1.0842,
+ "step": 1983
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017348349238929678,
+ "loss": 1.0043,
+ "step": 1984
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017345787690317815,
+ "loss": 1.1302,
+ "step": 1985
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017343225094356855,
+ "loss": 1.195,
+ "step": 1986
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017340661451412183,
+ "loss": 1.1449,
+ "step": 1987
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017338096761849309,
+ "loss": 1.2244,
+ "step": 1988
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017335531026033897,
+ "loss": 0.9273,
+ "step": 1989
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017332964244331776,
+ "loss": 1.0448,
+ "step": 1990
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017330396417108908,
+ "loss": 1.0074,
+ "step": 1991
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017327827544731412,
+ "loss": 0.9284,
+ "step": 1992
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.0001732525762756555,
+ "loss": 1.0307,
+ "step": 1993
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017322686665977737,
+ "loss": 1.1526,
+ "step": 1994
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017320114660334535,
+ "loss": 0.819,
+ "step": 1995
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017317541611002656,
+ "loss": 1.1029,
+ "step": 1996
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017314967518348962,
+ "loss": 1.2471,
+ "step": 1997
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017312392382740462,
+ "loss": 1.0156,
+ "step": 1998
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017309816204544317,
+ "loss": 1.1843,
+ "step": 1999
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017307238984127832,
+ "loss": 1.1588,
+ "step": 2000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017304660721858457,
+ "loss": 1.0157,
+ "step": 2001
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.000173020814181038,
+ "loss": 1.0563,
+ "step": 2002
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017299501073231622,
+ "loss": 1.1883,
+ "step": 2003
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017296919687609808,
+ "loss": 0.9404,
+ "step": 2004
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017294337261606417,
+ "loss": 1.2495,
+ "step": 2005
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017291753795589643,
+ "loss": 1.0074,
+ "step": 2006
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017289169289927837,
+ "loss": 1.1411,
+ "step": 2007
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017286583744989488,
+ "loss": 0.9942,
+ "step": 2008
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017283997161143239,
+ "loss": 0.952,
+ "step": 2009
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017281409538757883,
+ "loss": 1.2966,
+ "step": 2010
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017278820878202357,
+ "loss": 1.0836,
+ "step": 2011
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727623117984575,
+ "loss": 1.0984,
+ "step": 2012
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727364044405729,
+ "loss": 0.8822,
+ "step": 2013
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017271048671206366,
+ "loss": 1.2014,
+ "step": 2014
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017268455861662503,
+ "loss": 1.1779,
+ "step": 2015
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017265862015795384,
+ "loss": 0.9966,
+ "step": 2016
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017263267133974832,
+ "loss": 0.9536,
+ "step": 2017
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017260671216570822,
+ "loss": 0.811,
+ "step": 2018
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017258074263953472,
+ "loss": 0.8241,
+ "step": 2019
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017255476276493056,
+ "loss": 1.1263,
+ "step": 2020
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017252877254559986,
+ "loss": 0.995,
+ "step": 2021
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001725027719852483,
+ "loss": 1.1481,
+ "step": 2022
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001724767610875829,
+ "loss": 1.129,
+ "step": 2023
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017245073985631238,
+ "loss": 0.5928,
+ "step": 2024
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017242470829514672,
+ "loss": 0.8326,
+ "step": 2025
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017239866640779745,
+ "loss": 1.1092,
+ "step": 2026
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017237261419797756,
+ "loss": 1.5015,
+ "step": 2027
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001723465516694016,
+ "loss": 0.9775,
+ "step": 2028
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017232047882578548,
+ "loss": 0.9348,
+ "step": 2029
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001722943956708466,
+ "loss": 0.6199,
+ "step": 2030
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017226830220830384,
+ "loss": 1.1485,
+ "step": 2031
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017224219844187764,
+ "loss": 1.1195,
+ "step": 2032
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017221608437528973,
+ "loss": 1.0528,
+ "step": 2033
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017218996001226345,
+ "loss": 1.1058,
+ "step": 2034
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017216382535652355,
+ "loss": 1.1451,
+ "step": 2035
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001721376804117963,
+ "loss": 1.2251,
+ "step": 2036
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017211152518180936,
+ "loss": 1.0708,
+ "step": 2037
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017208535967029188,
+ "loss": 1.0746,
+ "step": 2038
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017205918388097456,
+ "loss": 1.3262,
+ "step": 2039
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017203299781758943,
+ "loss": 0.7619,
+ "step": 2040
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017200680148387007,
+ "loss": 1.01,
+ "step": 2041
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001719805948835515,
+ "loss": 1.1651,
+ "step": 2042
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017195437802037026,
+ "loss": 1.4671,
+ "step": 2043
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017192815089806424,
+ "loss": 0.9857,
+ "step": 2044
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001719019135203729,
+ "loss": 1.2613,
+ "step": 2045
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017187566589103704,
+ "loss": 1.4386,
+ "step": 2046
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001718494080137991,
+ "loss": 1.0965,
+ "step": 2047
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017182313989240285,
+ "loss": 0.752,
+ "step": 2048
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017179686153059352,
+ "loss": 0.9126,
+ "step": 2049
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017177057293211784,
+ "loss": 1.5075,
+ "step": 2050
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.000171744274100724,
+ "loss": 1.0407,
+ "step": 2051
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017171796504016166,
+ "loss": 0.8263,
+ "step": 2052
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.0001716916457541819,
+ "loss": 0.9453,
+ "step": 2053
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017166531624653722,
+ "loss": 0.9777,
+ "step": 2054
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017163897652098172,
+ "loss": 1.2129,
+ "step": 2055
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017161262658127086,
+ "loss": 1.3642,
+ "step": 2056
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017158626643116152,
+ "loss": 0.6798,
+ "step": 2057
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017155989607441213,
+ "loss": 0.874,
+ "step": 2058
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017153351551478247,
+ "loss": 1.0636,
+ "step": 2059
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001715071247560339,
+ "loss": 1.0563,
+ "step": 2060
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714807238019291,
+ "loss": 1.1984,
+ "step": 2061
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017145431265623234,
+ "loss": 0.9444,
+ "step": 2062
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714278913227092,
+ "loss": 0.7809,
+ "step": 2063
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017140145980512684,
+ "loss": 1.649,
+ "step": 2064
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713750181072538,
+ "loss": 1.0956,
+ "step": 2065
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713485662328601,
+ "loss": 1.2845,
+ "step": 2066
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017132210418571714,
+ "loss": 1.0484,
+ "step": 2067
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017129563196959793,
+ "loss": 1.0291,
+ "step": 2068
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017126914958827679,
+ "loss": 1.1226,
+ "step": 2069
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.0001712426570455295,
+ "loss": 1.0119,
+ "step": 2070
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017121615434513332,
+ "loss": 1.1663,
+ "step": 2071
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.000171189641490867,
+ "loss": 1.1353,
+ "step": 2072
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017116311848651064,
+ "loss": 1.0761,
+ "step": 2073
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017113658533584594,
+ "loss": 1.1978,
+ "step": 2074
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017111004204265582,
+ "loss": 1.3881,
+ "step": 2075
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017108348861072484,
+ "loss": 1.3945,
+ "step": 2076
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017105692504383897,
+ "loss": 1.3796,
+ "step": 2077
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017103035134578555,
+ "loss": 1.1721,
+ "step": 2078
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001710037675203534,
+ "loss": 1.0061,
+ "step": 2079
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017097717357133284,
+ "loss": 1.2456,
+ "step": 2080
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017095056950251555,
+ "loss": 0.788,
+ "step": 2081
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001709239553176947,
+ "loss": 1.16,
+ "step": 2082
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001708973310206649,
+ "loss": 1.0498,
+ "step": 2083
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017087069661522218,
+ "loss": 0.8993,
+ "step": 2084
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017084405210516406,
+ "loss": 1.2088,
+ "step": 2085
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001708173974942894,
+ "loss": 1.0897,
+ "step": 2086
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017079073278639863,
+ "loss": 1.2718,
+ "step": 2087
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017076405798529355,
+ "loss": 1.2325,
+ "step": 2088
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017073737309477736,
+ "loss": 1.0555,
+ "step": 2089
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017071067811865476,
+ "loss": 1.1428,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 2.3191208839416504,
+ "eval_runtime": 284.1375,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706839730607319,
+ "loss": 1.0908,
+ "step": 2091
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706572579248163,
+ "loss": 1.2092,
+ "step": 2092
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.000170630532714717,
+ "loss": 1.1735,
+ "step": 2093
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001706037974342444,
+ "loss": 1.2716,
+ "step": 2094
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017057705208721035,
+ "loss": 1.0095,
+ "step": 2095
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001705502966774282,
+ "loss": 1.3059,
+ "step": 2096
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017052353120871266,
+ "loss": 0.8269,
+ "step": 2097
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001704967556848799,
+ "loss": 1.0615,
+ "step": 2098
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017046997010974755,
+ "loss": 1.2709,
+ "step": 2099
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017044317448713461,
+ "loss": 1.1633,
+ "step": 2100
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017041636882086158,
+ "loss": 0.9273,
+ "step": 2101
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017038955311475038,
+ "loss": 1.3117,
+ "step": 2102
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001703627273726243,
+ "loss": 0.8883,
+ "step": 2103
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017033589159830815,
+ "loss": 1.1371,
+ "step": 2104
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017030904579562806,
+ "loss": 1.5402,
+ "step": 2105
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017028218996841172,
+ "loss": 0.9156,
+ "step": 2106
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017025532412048817,
+ "loss": 1.0962,
+ "step": 2107
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001702284482556879,
+ "loss": 0.9402,
+ "step": 2108
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017020156237784279,
+ "loss": 0.8146,
+ "step": 2109
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001701746664907862,
+ "loss": 1.1718,
+ "step": 2110
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017014776059835288,
+ "loss": 1.0618,
+ "step": 2111
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017012084470437907,
+ "loss": 1.4796,
+ "step": 2112
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017009391881270237,
+ "loss": 0.8402,
+ "step": 2113
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017006698292716178,
+ "loss": 1.1641,
+ "step": 2114
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001700400370515978,
+ "loss": 1.241,
+ "step": 2115
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017001308118985237,
+ "loss": 0.8683,
+ "step": 2116
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00016998611534576873,
+ "loss": 1.2697,
+ "step": 2117
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016995913952319168,
+ "loss": 0.9233,
+ "step": 2118
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016993215372596737,
+ "loss": 1.2472,
+ "step": 2119
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016990515795794334,
+ "loss": 1.2541,
+ "step": 2120
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016987815222296865,
+ "loss": 1.0016,
+ "step": 2121
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016985113652489374,
+ "loss": 1.0678,
+ "step": 2122
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016982411086757037,
+ "loss": 1.6066,
+ "step": 2123
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016979707525485192,
+ "loss": 1.229,
+ "step": 2124
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016977002969059302,
+ "loss": 0.752,
+ "step": 2125
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016974297417864977,
+ "loss": 0.8752,
+ "step": 2126
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001697159087228797,
+ "loss": 0.8896,
+ "step": 2127
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016968883332714186,
+ "loss": 0.9657,
+ "step": 2128
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696617479952964,
+ "loss": 1.3657,
+ "step": 2129
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696346527312053,
+ "loss": 0.9876,
+ "step": 2130
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016960754753873162,
+ "loss": 1.0165,
+ "step": 2131
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016958043242174003,
+ "loss": 1.625,
+ "step": 2132
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016955330738409655,
+ "loss": 1.5502,
+ "step": 2133
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016952617242966864,
+ "loss": 1.0793,
+ "step": 2134
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016949902756232507,
+ "loss": 1.4425,
+ "step": 2135
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016947187278593622,
+ "loss": 1.3124,
+ "step": 2136
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016944470810437365,
+ "loss": 0.927,
+ "step": 2137
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016941753352151055,
+ "loss": 1.1911,
+ "step": 2138
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016939034904122138,
+ "loss": 1.0768,
+ "step": 2139
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016936315466738205,
+ "loss": 1.1277,
+ "step": 2140
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016933595040386984,
+ "loss": 0.812,
+ "step": 2141
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.0001693087362545636,
+ "loss": 0.8299,
+ "step": 2142
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016928151222334338,
+ "loss": 1.1125,
+ "step": 2143
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016925427831409077,
+ "loss": 1.1835,
+ "step": 2144
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016922703453068873,
+ "loss": 1.2007,
+ "step": 2145
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016919978087702163,
+ "loss": 0.8524,
+ "step": 2146
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016917251735697523,
+ "loss": 0.9497,
+ "step": 2147
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016914524397443673,
+ "loss": 1.1004,
+ "step": 2148
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016911796073329466,
+ "loss": 0.8347,
+ "step": 2149
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016909066763743912,
+ "loss": 0.9492,
+ "step": 2150
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016906336469076148,
+ "loss": 1.1406,
+ "step": 2151
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016903605189715447,
+ "loss": 1.0137,
+ "step": 2152
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001690087292605124,
+ "loss": 1.0624,
+ "step": 2153
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016898139678473076,
+ "loss": 1.1767,
+ "step": 2154
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001689540544737067,
+ "loss": 1.4184,
+ "step": 2155
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016892670233133856,
+ "loss": 0.957,
+ "step": 2156
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016889934036152618,
+ "loss": 1.0399,
+ "step": 2157
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016887196856817073,
+ "loss": 1.2009,
+ "step": 2158
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016884458695517495,
+ "loss": 1.3977,
+ "step": 2159
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016881719552644273,
+ "loss": 1.1328,
+ "step": 2160
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016878979428587955,
+ "loss": 1.5007,
+ "step": 2161
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016876238323739221,
+ "loss": 1.1248,
+ "step": 2162
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016873496238488899,
+ "loss": 1.0358,
+ "step": 2163
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016870753173227945,
+ "loss": 1.2961,
+ "step": 2164
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016868009128347459,
+ "loss": 0.9435,
+ "step": 2165
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016865264104238683,
+ "loss": 0.9642,
+ "step": 2166
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016862518101293,
+ "loss": 1.0169,
+ "step": 2167
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016859771119901929,
+ "loss": 1.0904,
+ "step": 2168
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.0001685702316045713,
+ "loss": 1.3178,
+ "step": 2169
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016854274223350397,
+ "loss": 1.1395,
+ "step": 2170
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016851524308973678,
+ "loss": 1.1207,
+ "step": 2171
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016848773417719044,
+ "loss": 1.3544,
+ "step": 2172
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016846021549978715,
+ "loss": 1.3503,
+ "step": 2173
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016843268706145042,
+ "loss": 1.4276,
+ "step": 2174
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016840514886610529,
+ "loss": 0.9888,
+ "step": 2175
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016837760091767802,
+ "loss": 1.0913,
+ "step": 2176
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001683500432200964,
+ "loss": 1.4781,
+ "step": 2177
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016832247577728955,
+ "loss": 1.2657,
+ "step": 2178
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.000168294898593188,
+ "loss": 0.9206,
+ "step": 2179
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682673116717236,
+ "loss": 0.9218,
+ "step": 2180
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682397150168297,
+ "loss": 1.2719,
+ "step": 2181
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016821210863244096,
+ "loss": 0.984,
+ "step": 2182
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016818449252249345,
+ "loss": 1.4641,
+ "step": 2183
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001681568666909246,
+ "loss": 1.2571,
+ "step": 2184
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016812923114167328,
+ "loss": 1.2025,
+ "step": 2185
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016810158587867973,
+ "loss": 0.9621,
+ "step": 2186
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016807393090588553,
+ "loss": 1.0016,
+ "step": 2187
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016804626622723368,
+ "loss": 1.031,
+ "step": 2188
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016801859184666857,
+ "loss": 0.7573,
+ "step": 2189
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016799090776813597,
+ "loss": 1.2694,
+ "step": 2190
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.000167963213995583,
+ "loss": 1.196,
+ "step": 2191
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016793551053295822,
+ "loss": 0.8754,
+ "step": 2192
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016790779738421152,
+ "loss": 1.1743,
+ "step": 2193
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678800745532942,
+ "loss": 1.0921,
+ "step": 2194
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016785234204415888,
+ "loss": 0.8778,
+ "step": 2195
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678245998607597,
+ "loss": 1.0528,
+ "step": 2196
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016779684800705203,
+ "loss": 1.0255,
+ "step": 2197
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001677690864869927,
+ "loss": 0.6344,
+ "step": 2198
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016774131530453992,
+ "loss": 0.8691,
+ "step": 2199
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016771353446365318,
+ "loss": 1.2061,
+ "step": 2200
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001676857439682935,
+ "loss": 1.1759,
+ "step": 2201
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016765794382242314,
+ "loss": 1.1118,
+ "step": 2202
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016763013403000584,
+ "loss": 1.3005,
+ "step": 2203
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016760231459500666,
+ "loss": 1.0415,
+ "step": 2204
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.000167574485521392,
+ "loss": 0.824,
+ "step": 2205
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016754664681312975,
+ "loss": 0.6682,
+ "step": 2206
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016751879847418905,
+ "loss": 1.9204,
+ "step": 2207
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016749094050854047,
+ "loss": 0.9931,
+ "step": 2208
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016746307292015602,
+ "loss": 0.8898,
+ "step": 2209
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016743519571300888,
+ "loss": 1.3337,
+ "step": 2210
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016740730889107383,
+ "loss": 1.2947,
+ "step": 2211
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673794124583269,
+ "loss": 1.1882,
+ "step": 2212
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673515064187455,
+ "loss": 1.5408,
+ "step": 2213
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016732359077630847,
+ "loss": 1.1273,
+ "step": 2214
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001672956655349959,
+ "loss": 0.8954,
+ "step": 2215
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016726773069878934,
+ "loss": 1.1747,
+ "step": 2216
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016723978627167173,
+ "loss": 0.807,
+ "step": 2217
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016721183225762727,
+ "loss": 1.2512,
+ "step": 2218
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016718386866064166,
+ "loss": 1.0796,
+ "step": 2219
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016715589548470185,
+ "loss": 1.0905,
+ "step": 2220
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016712791273379622,
+ "loss": 1.3779,
+ "step": 2221
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016709992041191452,
+ "loss": 1.2015,
+ "step": 2222
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016707191852304782,
+ "loss": 0.8612,
+ "step": 2223
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001670439070711886,
+ "loss": 1.1819,
+ "step": 2224
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016701588606033064,
+ "loss": 1.2715,
+ "step": 2225
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001669878554944692,
+ "loss": 1.3681,
+ "step": 2226
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016695981537760072,
+ "loss": 1.1254,
+ "step": 2227
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669317657137232,
+ "loss": 0.9476,
+ "step": 2228
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669037065068359,
+ "loss": 1.235,
+ "step": 2229
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016687563776093941,
+ "loss": 0.7356,
+ "step": 2230
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016684755948003573,
+ "loss": 0.7901,
+ "step": 2231
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016681947166812824,
+ "loss": 1.317,
+ "step": 2232
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016679137432922163,
+ "loss": 0.8832,
+ "step": 2233
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016676326746732195,
+ "loss": 1.2776,
+ "step": 2234
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016673515108643665,
+ "loss": 1.0435,
+ "step": 2235
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001667070251905745,
+ "loss": 1.0957,
+ "step": 2236
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016667888978374567,
+ "loss": 1.0862,
+ "step": 2237
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016665074486996165,
+ "loss": 1.1112,
+ "step": 2238
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001666225904532352,
+ "loss": 1.3633,
+ "step": 2239
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016659442653758064,
+ "loss": 1.444,
+ "step": 2240
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016656625312701348,
+ "loss": 0.8248,
+ "step": 2241
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016653807022555067,
+ "loss": 1.2522,
+ "step": 2242
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001665098778372104,
+ "loss": 1.2107,
+ "step": 2243
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001664816759660124,
+ "loss": 1.0813,
+ "step": 2244
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016645346461597753,
+ "loss": 1.1136,
+ "step": 2245
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016642524379112817,
+ "loss": 1.1003,
+ "step": 2246
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.000166397013495488,
+ "loss": 1.0635,
+ "step": 2247
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016636877373308204,
+ "loss": 1.0575,
+ "step": 2248
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016634052450793663,
+ "loss": 0.7693,
+ "step": 2249
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016631226582407952,
+ "loss": 1.5965,
+ "step": 2250
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001662839976855398,
+ "loss": 1.0989,
+ "step": 2251
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016625572009634787,
+ "loss": 0.9198,
+ "step": 2252
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016622743306053548,
+ "loss": 1.0896,
+ "step": 2253
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016619913658213578,
+ "loss": 1.015,
+ "step": 2254
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.0001661708306651832,
+ "loss": 0.8572,
+ "step": 2255
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016614251531371353,
+ "loss": 1.1508,
+ "step": 2256
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.000166114190531764,
+ "loss": 1.1852,
+ "step": 2257
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016608585632337306,
+ "loss": 0.932,
+ "step": 2258
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016605751269258053,
+ "loss": 1.2542,
+ "step": 2259
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016602915964342757,
+ "loss": 0.943,
+ "step": 2260
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016600079717995678,
+ "loss": 1.2438,
+ "step": 2261
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016597242530621203,
+ "loss": 0.9928,
+ "step": 2262
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016594404402623845,
+ "loss": 0.9516,
+ "step": 2263
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016591565334408265,
+ "loss": 1.1689,
+ "step": 2264
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.0001658872532637925,
+ "loss": 1.3155,
+ "step": 2265
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016585884378941725,
+ "loss": 1.1596,
+ "step": 2266
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016583042492500746,
+ "loss": 0.9956,
+ "step": 2267
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016580199667461508,
+ "loss": 0.9289,
+ "step": 2268
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016577355904229325,
+ "loss": 1.3225,
+ "step": 2269
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016574511203209667,
+ "loss": 1.0384,
+ "step": 2270
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001657166556480812,
+ "loss": 0.697,
+ "step": 2271
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016568818989430416,
+ "loss": 0.7702,
+ "step": 2272
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016565971477482404,
+ "loss": 1.1041,
+ "step": 2273
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016563123029370093,
+ "loss": 1.0462,
+ "step": 2274
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001656027364549959,
+ "loss": 1.0797,
+ "step": 2275
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001655742332627717,
+ "loss": 1.3301,
+ "step": 2276
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.0001655457207210922,
+ "loss": 1.0467,
+ "step": 2277
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016551719883402271,
+ "loss": 0.9432,
+ "step": 2278
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016548866760562978,
+ "loss": 1.1808,
+ "step": 2279
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016546012703998138,
+ "loss": 1.1094,
+ "step": 2280
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016543157714114673,
+ "loss": 1.3914,
+ "step": 2281
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016540301791319645,
+ "loss": 1.0402,
+ "step": 2282
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016537444936020246,
+ "loss": 0.9815,
+ "step": 2283
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.000165345871486238,
+ "loss": 0.9722,
+ "step": 2284
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016531728429537766,
+ "loss": 0.919,
+ "step": 2285
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016528868779169738,
+ "loss": 1.1242,
+ "step": 2286
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016526008197927436,
+ "loss": 1.1794,
+ "step": 2287
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016523146686218718,
+ "loss": 1.434,
+ "step": 2288
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016520284244451574,
+ "loss": 0.8463,
+ "step": 2289
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016517420873034123,
+ "loss": 1.1736,
+ "step": 2290
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.0001651455657237462,
+ "loss": 1.0431,
+ "step": 2291
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016511691342881453,
+ "loss": 1.2796,
+ "step": 2292
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650882518496314,
+ "loss": 1.0578,
+ "step": 2293
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016505958099028334,
+ "loss": 1.3914,
+ "step": 2294
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650309008548582,
+ "loss": 1.0046,
+ "step": 2295
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650022114474451,
+ "loss": 1.0246,
+ "step": 2296
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016497351277213458,
+ "loss": 1.2789,
+ "step": 2297
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016494480483301836,
+ "loss": 1.0036,
+ "step": 2298
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016491608763418968,
+ "loss": 0.886,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "eval_loss": 2.3017475605010986,
+ "eval_runtime": 283.8846,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648873611797429,
+ "loss": 1.3953,
+ "step": 2300
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648586254737738,
+ "loss": 0.6972,
+ "step": 2301
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016482988052037947,
+ "loss": 1.2311,
+ "step": 2302
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016480112632365833,
+ "loss": 1.327,
+ "step": 2303
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647723628877101,
+ "loss": 0.9534,
+ "step": 2304
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647435902166358,
+ "loss": 0.9164,
+ "step": 2305
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647148083145378,
+ "loss": 1.1038,
+ "step": 2306
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016468601718551976,
+ "loss": 1.0444,
+ "step": 2307
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016465721683368666,
+ "loss": 1.2635,
+ "step": 2308
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016462840726314486,
+ "loss": 1.1647,
+ "step": 2309
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016459958847800187,
+ "loss": 1.3617,
+ "step": 2310
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016457076048236675,
+ "loss": 1.2355,
+ "step": 2311
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016454192328034962,
+ "loss": 0.9989,
+ "step": 2312
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016451307687606213,
+ "loss": 1.1218,
+ "step": 2313
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016448422127361706,
+ "loss": 0.8967,
+ "step": 2314
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644553564771287,
+ "loss": 1.159,
+ "step": 2315
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644264824907124,
+ "loss": 1.5901,
+ "step": 2316
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001643975993184851,
+ "loss": 0.979,
+ "step": 2317
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016436870696456482,
+ "loss": 0.8561,
+ "step": 2318
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016433980543307107,
+ "loss": 0.9485,
+ "step": 2319
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016431089472812444,
+ "loss": 0.7736,
+ "step": 2320
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016428197485384707,
+ "loss": 1.2546,
+ "step": 2321
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016425304581436226,
+ "loss": 0.9534,
+ "step": 2322
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001642241076137947,
+ "loss": 0.8182,
+ "step": 2323
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641951602562703,
+ "loss": 1.1107,
+ "step": 2324
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641662037459164,
+ "loss": 1.0628,
+ "step": 2325
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016413723808686147,
+ "loss": 1.6261,
+ "step": 2326
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001641082632832354,
+ "loss": 1.0286,
+ "step": 2327
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001640792793391694,
+ "loss": 0.5732,
+ "step": 2328
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016405028625879594,
+ "loss": 1.0932,
+ "step": 2329
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016402128404624882,
+ "loss": 1.2585,
+ "step": 2330
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016399227270566308,
+ "loss": 0.8788,
+ "step": 2331
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639632522411751,
+ "loss": 1.1397,
+ "step": 2332
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016393422265692262,
+ "loss": 1.3517,
+ "step": 2333
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639051839570446,
+ "loss": 1.1346,
+ "step": 2334
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016387613614568126,
+ "loss": 0.9594,
+ "step": 2335
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001638470792269743,
+ "loss": 1.0674,
+ "step": 2336
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016381801320506653,
+ "loss": 0.9123,
+ "step": 2337
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016378893808410215,
+ "loss": 1.1909,
+ "step": 2338
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016375985386822664,
+ "loss": 1.0474,
+ "step": 2339
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016373076056158675,
+ "loss": 0.8844,
+ "step": 2340
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001637016581683306,
+ "loss": 1.1606,
+ "step": 2341
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016367254669260749,
+ "loss": 0.6206,
+ "step": 2342
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016364342613856816,
+ "loss": 0.7225,
+ "step": 2343
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016361429651036446,
+ "loss": 1.1782,
+ "step": 2344
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016358515781214977,
+ "loss": 1.0911,
+ "step": 2345
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016355601004807856,
+ "loss": 1.2727,
+ "step": 2346
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016352685322230663,
+ "loss": 0.8294,
+ "step": 2347
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016349768733899117,
+ "loss": 1.1661,
+ "step": 2348
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016346851240229057,
+ "loss": 0.8267,
+ "step": 2349
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016343932841636456,
+ "loss": 1.2873,
+ "step": 2350
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016341013538537412,
+ "loss": 1.2459,
+ "step": 2351
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016338093331348156,
+ "loss": 0.8939,
+ "step": 2352
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016335172220485042,
+ "loss": 1.024,
+ "step": 2353
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.0001633225020636456,
+ "loss": 0.9981,
+ "step": 2354
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016329327289403325,
+ "loss": 1.331,
+ "step": 2355
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016326403470018084,
+ "loss": 0.7446,
+ "step": 2356
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016323478748625703,
+ "loss": 1.1931,
+ "step": 2357
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016320553125643187,
+ "loss": 1.1287,
+ "step": 2358
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016317626601487667,
+ "loss": 1.109,
+ "step": 2359
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016314699176576402,
+ "loss": 0.9946,
+ "step": 2360
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016311770851326778,
+ "loss": 0.8347,
+ "step": 2361
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016308841626156307,
+ "loss": 0.9214,
+ "step": 2362
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.0001630591150148264,
+ "loss": 0.5907,
+ "step": 2363
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016302980477723539,
+ "loss": 1.2412,
+ "step": 2364
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016300048555296915,
+ "loss": 1.2908,
+ "step": 2365
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016297115734620788,
+ "loss": 1.2345,
+ "step": 2366
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016294182016113315,
+ "loss": 1.0418,
+ "step": 2367
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016291247400192785,
+ "loss": 1.1457,
+ "step": 2368
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016288311887277608,
+ "loss": 1.2529,
+ "step": 2369
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016285375477786322,
+ "loss": 1.0013,
+ "step": 2370
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016282438172137597,
+ "loss": 0.943,
+ "step": 2371
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016279499970750226,
+ "loss": 0.7009,
+ "step": 2372
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016276560874043137,
+ "loss": 0.9408,
+ "step": 2373
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627362088243538,
+ "loss": 1.1788,
+ "step": 2374
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627067999634613,
+ "loss": 0.8106,
+ "step": 2375
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016267738216194696,
+ "loss": 1.1695,
+ "step": 2376
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001626479554240051,
+ "loss": 0.9209,
+ "step": 2377
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016261851975383137,
+ "loss": 0.9911,
+ "step": 2378
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016258907515562262,
+ "loss": 1.3819,
+ "step": 2379
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162559621633577,
+ "loss": 0.8926,
+ "step": 2380
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162530159191894,
+ "loss": 1.0896,
+ "step": 2381
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016250068783477424,
+ "loss": 0.8403,
+ "step": 2382
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016247120756641972,
+ "loss": 0.7976,
+ "step": 2383
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001624417183910337,
+ "loss": 0.8881,
+ "step": 2384
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001624122203128207,
+ "loss": 0.8302,
+ "step": 2385
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623827133359865,
+ "loss": 1.3312,
+ "step": 2386
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623531974647381,
+ "loss": 1.003,
+ "step": 2387
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623236727032839,
+ "loss": 0.9487,
+ "step": 2388
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016229413905583342,
+ "loss": 1.2259,
+ "step": 2389
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016226459652659753,
+ "loss": 0.9327,
+ "step": 2390
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016223504511978838,
+ "loss": 0.7336,
+ "step": 2391
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016220548483961934,
+ "loss": 1.0454,
+ "step": 2392
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016217591569030505,
+ "loss": 1.3371,
+ "step": 2393
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016214633767606143,
+ "loss": 1.0814,
+ "step": 2394
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016211675080110566,
+ "loss": 1.2274,
+ "step": 2395
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001620871550696562,
+ "loss": 0.9775,
+ "step": 2396
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016205755048593273,
+ "loss": 1.0323,
+ "step": 2397
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016202793705415622,
+ "loss": 1.5101,
+ "step": 2398
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016199831477854893,
+ "loss": 0.8118,
+ "step": 2399
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001619686836633343,
+ "loss": 1.0233,
+ "step": 2400
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016193904371273715,
+ "loss": 0.9038,
+ "step": 2401
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016190939493098344,
+ "loss": 0.875,
+ "step": 2402
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016187973732230038,
+ "loss": 1.3274,
+ "step": 2403
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016185007089091665,
+ "loss": 1.081,
+ "step": 2404
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016182039564106192,
+ "loss": 1.0841,
+ "step": 2405
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016179071157696728,
+ "loss": 1.3208,
+ "step": 2406
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.000161761018702865,
+ "loss": 1.1854,
+ "step": 2407
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617313170229887,
+ "loss": 1.0651,
+ "step": 2408
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617016065415731,
+ "loss": 1.1398,
+ "step": 2409
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016167188726285434,
+ "loss": 1.2778,
+ "step": 2410
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016164215919106968,
+ "loss": 1.6758,
+ "step": 2411
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.0001616124223304577,
+ "loss": 0.8341,
+ "step": 2412
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016158267668525832,
+ "loss": 0.9513,
+ "step": 2413
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016155292225971253,
+ "loss": 0.9617,
+ "step": 2414
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016152315905806268,
+ "loss": 0.8664,
+ "step": 2415
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016149338708455237,
+ "loss": 1.331,
+ "step": 2416
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016146360634342643,
+ "loss": 1.4212,
+ "step": 2417
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016143381683893094,
+ "loss": 1.2126,
+ "step": 2418
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016140401857531322,
+ "loss": 0.934,
+ "step": 2419
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016137421155682183,
+ "loss": 1.2417,
+ "step": 2420
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001613443957877067,
+ "loss": 1.637,
+ "step": 2421
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016131457127221881,
+ "loss": 1.1456,
+ "step": 2422
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016128473801461053,
+ "loss": 0.9402,
+ "step": 2423
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612548960191354,
+ "loss": 1.3797,
+ "step": 2424
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612250452900483,
+ "loss": 0.8191,
+ "step": 2425
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001611951858316052,
+ "loss": 1.1725,
+ "step": 2426
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016116531764806346,
+ "loss": 1.5701,
+ "step": 2427
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016113544074368164,
+ "loss": 1.0591,
+ "step": 2428
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016110555512271953,
+ "loss": 1.03,
+ "step": 2429
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.0001610756607894382,
+ "loss": 1.1829,
+ "step": 2430
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016104575774809985,
+ "loss": 1.2222,
+ "step": 2431
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016101584600296804,
+ "loss": 1.1537,
+ "step": 2432
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016098592555830753,
+ "loss": 1.0973,
+ "step": 2433
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016095599641838436,
+ "loss": 1.0793,
+ "step": 2434
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016092605858746573,
+ "loss": 1.3484,
+ "step": 2435
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608961120698201,
+ "loss": 1.1689,
+ "step": 2436
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016086615686971726,
+ "loss": 1.0864,
+ "step": 2437
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016083619299142813,
+ "loss": 1.2451,
+ "step": 2438
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608062204392249,
+ "loss": 0.9593,
+ "step": 2439
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016077623921738102,
+ "loss": 0.9816,
+ "step": 2440
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016074624933017112,
+ "loss": 1.0845,
+ "step": 2441
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016071625078187114,
+ "loss": 0.9875,
+ "step": 2442
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001606862435767582,
+ "loss": 0.8758,
+ "step": 2443
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016065622771911067,
+ "loss": 0.9499,
+ "step": 2444
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016062620321320823,
+ "loss": 1.1133,
+ "step": 2445
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605961700633316,
+ "loss": 0.7228,
+ "step": 2446
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016056612827376293,
+ "loss": 1.2297,
+ "step": 2447
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605360778487855,
+ "loss": 1.0251,
+ "step": 2448
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016050601879268386,
+ "loss": 0.8097,
+ "step": 2449
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016047595110974376,
+ "loss": 0.9872,
+ "step": 2450
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001604458748042522,
+ "loss": 1.1119,
+ "step": 2451
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.0001604157898804974,
+ "loss": 0.8256,
+ "step": 2452
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016038569634276882,
+ "loss": 0.9036,
+ "step": 2453
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016035559419535716,
+ "loss": 1.1173,
+ "step": 2454
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016032548344255428,
+ "loss": 1.3173,
+ "step": 2455
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016029536408865337,
+ "loss": 0.717,
+ "step": 2456
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016026523613794878,
+ "loss": 0.9806,
+ "step": 2457
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016023509959473605,
+ "loss": 1.1509,
+ "step": 2458
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016020495446331207,
+ "loss": 1.0454,
+ "step": 2459
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601748007479748,
+ "loss": 1.183,
+ "step": 2460
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601446384530236,
+ "loss": 1.2611,
+ "step": 2461
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016011446758275888,
+ "loss": 1.0377,
+ "step": 2462
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016008428814148236,
+ "loss": 1.2111,
+ "step": 2463
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016005410013349698,
+ "loss": 1.0952,
+ "step": 2464
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016002390356310685,
+ "loss": 0.7589,
+ "step": 2465
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015999369843461742,
+ "loss": 0.8543,
+ "step": 2466
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015996348475233525,
+ "loss": 1.1509,
+ "step": 2467
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001599332625205681,
+ "loss": 1.287,
+ "step": 2468
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015990303174362512,
+ "loss": 1.0401,
+ "step": 2469
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001598727924258164,
+ "loss": 1.0247,
+ "step": 2470
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015984254457145354,
+ "loss": 1.1537,
+ "step": 2471
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015981228818484917,
+ "loss": 0.9606,
+ "step": 2472
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001597820232703172,
+ "loss": 0.8709,
+ "step": 2473
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015975174983217275,
+ "loss": 1.2827,
+ "step": 2474
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015972146787473213,
+ "loss": 0.8057,
+ "step": 2475
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001596911774023129,
+ "loss": 1.0857,
+ "step": 2476
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015966087841923386,
+ "loss": 1.1731,
+ "step": 2477
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001596305709298149,
+ "loss": 0.8871,
+ "step": 2478
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015960025493837727,
+ "loss": 1.0671,
+ "step": 2479
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015956993044924334,
+ "loss": 1.3735,
+ "step": 2480
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015953959746673675,
+ "loss": 1.4655,
+ "step": 2481
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015950925599518228,
+ "loss": 1.3975,
+ "step": 2482
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015947890603890602,
+ "loss": 0.9468,
+ "step": 2483
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001594485476022352,
+ "loss": 0.9976,
+ "step": 2484
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015941818068949818,
+ "loss": 0.6732,
+ "step": 2485
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015938780530502474,
+ "loss": 0.9848,
+ "step": 2486
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015935742145314568,
+ "loss": 1.2441,
+ "step": 2487
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001593270291381931,
+ "loss": 0.9631,
+ "step": 2488
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015929662836450029,
+ "loss": 0.8868,
+ "step": 2489
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592662191364017,
+ "loss": 0.9063,
+ "step": 2490
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015923580145823303,
+ "loss": 0.6886,
+ "step": 2491
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592053753343312,
+ "loss": 1.0702,
+ "step": 2492
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591749407690343,
+ "loss": 1.3879,
+ "step": 2493
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015914449776668167,
+ "loss": 1.1048,
+ "step": 2494
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591140463316137,
+ "loss": 0.9921,
+ "step": 2495
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015908358646817225,
+ "loss": 1.3042,
+ "step": 2496
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015905311818070015,
+ "loss": 0.8413,
+ "step": 2497
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015902264147354153,
+ "loss": 1.5201,
+ "step": 2498
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589921563510417,
+ "loss": 1.0727,
+ "step": 2499
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589616628175472,
+ "loss": 1.0439,
+ "step": 2500
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589311608774057,
+ "loss": 1.2308,
+ "step": 2501
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015890065053496613,
+ "loss": 1.1155,
+ "step": 2502
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015887013179457862,
+ "loss": 1.3345,
+ "step": 2503
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015883960466059444,
+ "loss": 0.9551,
+ "step": 2504
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001588090691373661,
+ "loss": 1.0713,
+ "step": 2505
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015877852522924732,
+ "loss": 1.299,
+ "step": 2506
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.000158747972940593,
+ "loss": 0.8535,
+ "step": 2507
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001587174122757592,
+ "loss": 0.9924,
+ "step": 2508
+ },
+ {
+ "epoch": 2.98,
+ "eval_loss": 2.328662395477295,
+ "eval_runtime": 283.7765,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2508
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001586868432391032,
+ "loss": 1.0512,
+ "step": 2509
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015865626583498355,
+ "loss": 1.2775,
+ "step": 2510
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015862568006775983,
+ "loss": 0.7054,
+ "step": 2511
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015859508594179294,
+ "loss": 0.8524,
+ "step": 2512
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015856448346144496,
+ "loss": 0.9871,
+ "step": 2513
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015853387263107909,
+ "loss": 0.8642,
+ "step": 2514
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015850325345505975,
+ "loss": 1.1789,
+ "step": 2515
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015847262593775266,
+ "loss": 1.2765,
+ "step": 2516
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015844199008352458,
+ "loss": 0.6272,
+ "step": 2517
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015841134589674352,
+ "loss": 1.3037,
+ "step": 2518
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015838069338177863,
+ "loss": 1.054,
+ "step": 2519
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015835003254300039,
+ "loss": 1.1942,
+ "step": 2520
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015831936338478025,
+ "loss": 0.8866,
+ "step": 2521
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015828868591149104,
+ "loss": 1.1444,
+ "step": 2522
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015825800012750666,
+ "loss": 0.8597,
+ "step": 2523
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001582273060372023,
+ "loss": 0.7731,
+ "step": 2524
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015819660364495416,
+ "loss": 1.1953,
+ "step": 2525
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001581658929551398,
+ "loss": 1.3946,
+ "step": 2526
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015813517397213791,
+ "loss": 1.0173,
+ "step": 2527
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015810444670032831,
+ "loss": 1.1762,
+ "step": 2528
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015807371114409202,
+ "loss": 0.7283,
+ "step": 2529
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015804296730781135,
+ "loss": 1.1515,
+ "step": 2530
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015801221519586958,
+ "loss": 0.9389,
+ "step": 2531
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001579814548126514,
+ "loss": 1.1869,
+ "step": 2532
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015795068616254247,
+ "loss": 1.2957,
+ "step": 2533
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015791990924992981,
+ "loss": 1.0514,
+ "step": 2534
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015788912407920148,
+ "loss": 0.6762,
+ "step": 2535
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015785833065474683,
+ "loss": 0.4121,
+ "step": 2536
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015782752898095627,
+ "loss": 0.4532,
+ "step": 2537
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001577967190622215,
+ "loss": 0.4847,
+ "step": 2538
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001577659009029353,
+ "loss": 0.8313,
+ "step": 2539
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015773507450749172,
+ "loss": 0.5304,
+ "step": 2540
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015770423988028588,
+ "loss": 0.6003,
+ "step": 2541
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015767339702571414,
+ "loss": 0.3988,
+ "step": 2542
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015764254594817398,
+ "loss": 0.6133,
+ "step": 2543
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001576116866520642,
+ "loss": 0.4858,
+ "step": 2544
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015758081914178456,
+ "loss": 0.3691,
+ "step": 2545
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001575499434217361,
+ "loss": 0.5441,
+ "step": 2546
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001575190594963211,
+ "loss": 0.4605,
+ "step": 2547
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015748816736994284,
+ "loss": 0.3681,
+ "step": 2548
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015745726704700593,
+ "loss": 0.4113,
+ "step": 2549
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015742635853191608,
+ "loss": 0.5233,
+ "step": 2550
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015739544182908014,
+ "loss": 0.356,
+ "step": 2551
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015736451694290616,
+ "loss": 0.4105,
+ "step": 2552
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015733358387780337,
+ "loss": 0.4451,
+ "step": 2553
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015730264263818212,
+ "loss": 0.5023,
+ "step": 2554
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.000157271693228454,
+ "loss": 0.3671,
+ "step": 2555
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001572407356530317,
+ "loss": 0.7077,
+ "step": 2556
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015720976991632913,
+ "loss": 0.4439,
+ "step": 2557
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015717879602276122,
+ "loss": 0.5961,
+ "step": 2558
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001571478139767443,
+ "loss": 0.4269,
+ "step": 2559
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015711682378269565,
+ "loss": 0.3427,
+ "step": 2560
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015708582544503386,
+ "loss": 0.5736,
+ "step": 2561
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015705481896817854,
+ "loss": 0.3707,
+ "step": 2562
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001570238043565506,
+ "loss": 0.4076,
+ "step": 2563
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.000156992781614572,
+ "loss": 0.6514,
+ "step": 2564
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015696175074666598,
+ "loss": 0.4012,
+ "step": 2565
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.0001569307117572568,
+ "loss": 0.3492,
+ "step": 2566
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015689966465076992,
+ "loss": 0.4121,
+ "step": 2567
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015686860943163206,
+ "loss": 0.5769,
+ "step": 2568
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015683754610427094,
+ "loss": 0.4872,
+ "step": 2569
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015680647467311557,
+ "loss": 0.5518,
+ "step": 2570
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015677539514259608,
+ "loss": 0.411,
+ "step": 2571
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015674430751714361,
+ "loss": 0.3443,
+ "step": 2572
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015671321180119074,
+ "loss": 0.3706,
+ "step": 2573
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.0001566821079991709,
+ "loss": 0.6168,
+ "step": 2574
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.0001566509961155189,
+ "loss": 0.3726,
+ "step": 2575
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015661987615467058,
+ "loss": 0.3976,
+ "step": 2576
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015658874812106297,
+ "loss": 0.3697,
+ "step": 2577
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015655761201913425,
+ "loss": 0.2759,
+ "step": 2578
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015652646785332378,
+ "loss": 0.3572,
+ "step": 2579
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.000156495315628072,
+ "loss": 0.5333,
+ "step": 2580
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015646415534782056,
+ "loss": 0.4004,
+ "step": 2581
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.0001564329870170122,
+ "loss": 0.4736,
+ "step": 2582
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015640181064009088,
+ "loss": 0.4814,
+ "step": 2583
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015637062622150168,
+ "loss": 0.3351,
+ "step": 2584
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015633943376569081,
+ "loss": 0.4497,
+ "step": 2585
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015630823327710558,
+ "loss": 0.4202,
+ "step": 2586
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015627702476019457,
+ "loss": 0.5934,
+ "step": 2587
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.0001562458082194074,
+ "loss": 0.4664,
+ "step": 2588
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015621458365919487,
+ "loss": 0.4077,
+ "step": 2589
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015618335108400893,
+ "loss": 0.5244,
+ "step": 2590
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015615211049830268,
+ "loss": 0.5042,
+ "step": 2591
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015612086190653027,
+ "loss": 0.3442,
+ "step": 2592
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015608960531314717,
+ "loss": 0.6337,
+ "step": 2593
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015605834072260984,
+ "loss": 0.3542,
+ "step": 2594
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.0001560270681393759,
+ "loss": 0.5113,
+ "step": 2595
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.0001559957875679042,
+ "loss": 0.4346,
+ "step": 2596
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015596449901265463,
+ "loss": 0.5231,
+ "step": 2597
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015593320247808822,
+ "loss": 0.5193,
+ "step": 2598
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001559018979686673,
+ "loss": 0.3575,
+ "step": 2599
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015587058548885505,
+ "loss": 0.6356,
+ "step": 2600
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015583926504311605,
+ "loss": 0.3313,
+ "step": 2601
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015580793663591585,
+ "loss": 0.356,
+ "step": 2602
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015577660027172127,
+ "loss": 0.5498,
+ "step": 2603
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001557452559550001,
+ "loss": 0.3973,
+ "step": 2604
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001557139036902215,
+ "loss": 0.4751,
+ "step": 2605
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015568254348185544,
+ "loss": 0.4297,
+ "step": 2606
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015565117533437335,
+ "loss": 0.4299,
+ "step": 2607
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015561979925224754,
+ "loss": 0.4651,
+ "step": 2608
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015558841523995162,
+ "loss": 0.474,
+ "step": 2609
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015555702330196023,
+ "loss": 0.4143,
+ "step": 2610
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.0001555256234427492,
+ "loss": 0.393,
+ "step": 2611
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015549421566679546,
+ "loss": 0.3738,
+ "step": 2612
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015546279997857704,
+ "loss": 0.4394,
+ "step": 2613
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.0001554313763825732,
+ "loss": 0.3702,
+ "step": 2614
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015539994488326418,
+ "loss": 0.4594,
+ "step": 2615
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015536850548513147,
+ "loss": 0.3249,
+ "step": 2616
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015533705819265764,
+ "loss": 0.3857,
+ "step": 2617
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.0001553056030103264,
+ "loss": 0.3272,
+ "step": 2618
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015527413994262257,
+ "loss": 0.5204,
+ "step": 2619
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015524266899403206,
+ "loss": 0.3653,
+ "step": 2620
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.000155211190169042,
+ "loss": 0.4698,
+ "step": 2621
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.0001551797034721405,
+ "loss": 0.5949,
+ "step": 2622
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015514820890781693,
+ "loss": 0.4074,
+ "step": 2623
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015511670648056178,
+ "loss": 0.3586,
+ "step": 2624
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.0001550851961948665,
+ "loss": 0.6494,
+ "step": 2625
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015505367805522383,
+ "loss": 0.4914,
+ "step": 2626
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.0001550221520661276,
+ "loss": 0.4594,
+ "step": 2627
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015499061823207266,
+ "loss": 0.4102,
+ "step": 2628
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015495907655755506,
+ "loss": 0.4229,
+ "step": 2629
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.000154927527047072,
+ "loss": 0.7218,
+ "step": 2630
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001548959697051217,
+ "loss": 0.6929,
+ "step": 2631
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015486440453620358,
+ "loss": 0.3628,
+ "step": 2632
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015483283154481815,
+ "loss": 0.4433,
+ "step": 2633
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015480125073546704,
+ "loss": 0.3912,
+ "step": 2634
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001547696621126529,
+ "loss": 0.3682,
+ "step": 2635
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015473806568087968,
+ "loss": 0.354,
+ "step": 2636
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001547064614446523,
+ "loss": 0.4789,
+ "step": 2637
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001546748494084768,
+ "loss": 0.382,
+ "step": 2638
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015464322957686041,
+ "loss": 0.4954,
+ "step": 2639
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015461160195431148,
+ "loss": 0.3273,
+ "step": 2640
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001545799665453393,
+ "loss": 0.3414,
+ "step": 2641
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015454832335445447,
+ "loss": 0.5479,
+ "step": 2642
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001545166723861686,
+ "loss": 0.4963,
+ "step": 2643
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015448501364499445,
+ "loss": 0.5547,
+ "step": 2644
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001544533471354458,
+ "loss": 0.4637,
+ "step": 2645
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015442167286203767,
+ "loss": 0.4248,
+ "step": 2646
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015438999082928608,
+ "loss": 0.4213,
+ "step": 2647
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015435830104170822,
+ "loss": 0.3734,
+ "step": 2648
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015432660350382234,
+ "loss": 0.4627,
+ "step": 2649
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.0001542948982201479,
+ "loss": 0.3422,
+ "step": 2650
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015426318519520525,
+ "loss": 0.4409,
+ "step": 2651
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015423146443351607,
+ "loss": 0.3717,
+ "step": 2652
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015419973593960298,
+ "loss": 0.4349,
+ "step": 2653
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015416799971798985,
+ "loss": 0.5349,
+ "step": 2654
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.0001541362557732015,
+ "loss": 0.4511,
+ "step": 2655
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.000154104504109764,
+ "loss": 0.5997,
+ "step": 2656
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015407274473220434,
+ "loss": 0.661,
+ "step": 2657
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015404097764505083,
+ "loss": 0.3456,
+ "step": 2658
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015400920285283268,
+ "loss": 0.3416,
+ "step": 2659
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015397742036008034,
+ "loss": 0.4707,
+ "step": 2660
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015394563017132526,
+ "loss": 0.3221,
+ "step": 2661
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015391383229110007,
+ "loss": 0.6108,
+ "step": 2662
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015388202672393834,
+ "loss": 0.5504,
+ "step": 2663
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015385021347437498,
+ "loss": 0.3973,
+ "step": 2664
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015381839254694583,
+ "loss": 0.5149,
+ "step": 2665
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015378656394618787,
+ "loss": 0.5853,
+ "step": 2666
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.0001537547276766391,
+ "loss": 0.517,
+ "step": 2667
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015372288374283875,
+ "loss": 0.5485,
+ "step": 2668
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015369103214932703,
+ "loss": 0.4907,
+ "step": 2669
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.0001536591729006453,
+ "loss": 0.3169,
+ "step": 2670
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015362730600133596,
+ "loss": 0.5431,
+ "step": 2671
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015359543145594258,
+ "loss": 0.2586,
+ "step": 2672
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015356354926900979,
+ "loss": 0.5251,
+ "step": 2673
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015353165944508325,
+ "loss": 0.4104,
+ "step": 2674
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015349976198870973,
+ "loss": 0.4825,
+ "step": 2675
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015346785690443718,
+ "loss": 0.5274,
+ "step": 2676
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.0001534359441968145,
+ "loss": 0.3878,
+ "step": 2677
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.0001534040238703918,
+ "loss": 0.5132,
+ "step": 2678
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015337209592972023,
+ "loss": 0.5145,
+ "step": 2679
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015334016037935196,
+ "loss": 0.5548,
+ "step": 2680
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015330821722384037,
+ "loss": 0.7494,
+ "step": 2681
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015327626646773976,
+ "loss": 0.5569,
+ "step": 2682
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015324430811560573,
+ "loss": 0.2622,
+ "step": 2683
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001532123421719948,
+ "loss": 0.3749,
+ "step": 2684
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015318036864146457,
+ "loss": 0.3959,
+ "step": 2685
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001531483875285738,
+ "loss": 0.5243,
+ "step": 2686
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001531163988378823,
+ "loss": 0.3115,
+ "step": 2687
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015308440257395093,
+ "loss": 0.2385,
+ "step": 2688
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015305239874134174,
+ "loss": 0.4431,
+ "step": 2689
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.0001530203873446177,
+ "loss": 0.378,
+ "step": 2690
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015298836838834298,
+ "loss": 0.4521,
+ "step": 2691
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015295634187708279,
+ "loss": 0.6309,
+ "step": 2692
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015292430781540335,
+ "loss": 0.4355,
+ "step": 2693
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015289226620787208,
+ "loss": 0.4537,
+ "step": 2694
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.0001528602170590574,
+ "loss": 0.4305,
+ "step": 2695
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015282816037352878,
+ "loss": 0.5355,
+ "step": 2696
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015279609615585687,
+ "loss": 0.5243,
+ "step": 2697
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001527640244106133,
+ "loss": 0.5334,
+ "step": 2698
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015273194514237078,
+ "loss": 0.5409,
+ "step": 2699
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001526998583557031,
+ "loss": 0.4042,
+ "step": 2700
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015266776405518524,
+ "loss": 0.5536,
+ "step": 2701
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.000152635662245393,
+ "loss": 0.2743,
+ "step": 2702
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015260355293090353,
+ "loss": 0.4762,
+ "step": 2703
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015257143611629482,
+ "loss": 0.4552,
+ "step": 2704
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001525393118061461,
+ "loss": 0.5395,
+ "step": 2705
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001525071800050375,
+ "loss": 0.4297,
+ "step": 2706
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015247504071755046,
+ "loss": 0.364,
+ "step": 2707
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015244289394826722,
+ "loss": 0.9499,
+ "step": 2708
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015241073970177126,
+ "loss": 0.579,
+ "step": 2709
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.000152378577982647,
+ "loss": 0.3111,
+ "step": 2710
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001523464087954801,
+ "loss": 0.3345,
+ "step": 2711
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015231423214485715,
+ "loss": 0.4628,
+ "step": 2712
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015228204803536586,
+ "loss": 0.4803,
+ "step": 2713
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001522498564715949,
+ "loss": 0.4164,
+ "step": 2714
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015221765745813417,
+ "loss": 0.6468,
+ "step": 2715
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015218545099957449,
+ "loss": 0.4495,
+ "step": 2716
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015215323710050785,
+ "loss": 0.4184,
+ "step": 2717
+ },
+ {
+ "epoch": 3.22,
+ "eval_loss": 2.9206559658050537,
+ "eval_runtime": 283.9002,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2717
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015212101576552722,
+ "loss": 0.4215,
+ "step": 2718
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015208878699922668,
+ "loss": 1.4488,
+ "step": 2719
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.0001520565508062013,
+ "loss": 0.4449,
+ "step": 2720
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.0001520243071910473,
+ "loss": 0.2853,
+ "step": 2721
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015199205615836191,
+ "loss": 0.4572,
+ "step": 2722
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015195979771274342,
+ "loss": 0.4436,
+ "step": 2723
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.0001519275318587912,
+ "loss": 0.38,
+ "step": 2724
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015189525860110563,
+ "loss": 0.4956,
+ "step": 2725
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015186297794428816,
+ "loss": 0.8514,
+ "step": 2726
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015183068989294132,
+ "loss": 0.4518,
+ "step": 2727
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015179839445166873,
+ "loss": 0.5581,
+ "step": 2728
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015176609162507497,
+ "loss": 0.5828,
+ "step": 2729
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015173378141776568,
+ "loss": 0.4109,
+ "step": 2730
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015170146383434768,
+ "loss": 0.5762,
+ "step": 2731
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015166913887942868,
+ "loss": 0.4502,
+ "step": 2732
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015163680655761757,
+ "loss": 0.3736,
+ "step": 2733
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015160446687352417,
+ "loss": 0.3771,
+ "step": 2734
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015157211983175947,
+ "loss": 0.469,
+ "step": 2735
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015153976543693542,
+ "loss": 0.665,
+ "step": 2736
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015150740369366508,
+ "loss": 0.3495,
+ "step": 2737
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.0001514750346065625,
+ "loss": 0.4513,
+ "step": 2738
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.0001514426581802428,
+ "loss": 0.4571,
+ "step": 2739
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015141027441932216,
+ "loss": 0.4197,
+ "step": 2740
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015137788332841785,
+ "loss": 0.3396,
+ "step": 2741
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015134548491214806,
+ "loss": 0.3547,
+ "step": 2742
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015131307917513214,
+ "loss": 0.3073,
+ "step": 2743
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015128066612199044,
+ "loss": 0.7091,
+ "step": 2744
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015124824575734428,
+ "loss": 0.2845,
+ "step": 2745
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015121581808581622,
+ "loss": 0.2903,
+ "step": 2746
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015118338311202964,
+ "loss": 0.4065,
+ "step": 2747
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015115094084060916,
+ "loss": 0.6152,
+ "step": 2748
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015111849127618022,
+ "loss": 0.5352,
+ "step": 2749
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.0001510860344233695,
+ "loss": 0.414,
+ "step": 2750
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015105357028680457,
+ "loss": 0.4756,
+ "step": 2751
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015102109887111422,
+ "loss": 0.4644,
+ "step": 2752
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015098862018092808,
+ "loss": 0.4231,
+ "step": 2753
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015095613422087692,
+ "loss": 0.4617,
+ "step": 2754
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.0001509236409955925,
+ "loss": 0.5876,
+ "step": 2755
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001508911405097077,
+ "loss": 0.5696,
+ "step": 2756
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015085863276785637,
+ "loss": 0.3826,
+ "step": 2757
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001508261177746734,
+ "loss": 0.4338,
+ "step": 2758
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001507935955347947,
+ "loss": 0.3546,
+ "step": 2759
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015076106605285724,
+ "loss": 0.413,
+ "step": 2760
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.000150728529333499,
+ "loss": 0.3954,
+ "step": 2761
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015069598538135906,
+ "loss": 0.5214,
+ "step": 2762
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001506634342010774,
+ "loss": 0.5239,
+ "step": 2763
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015063087579729519,
+ "loss": 0.8681,
+ "step": 2764
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015059831017465449,
+ "loss": 0.4616,
+ "step": 2765
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015056573733779848,
+ "loss": 0.4721,
+ "step": 2766
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015053315729137128,
+ "loss": 0.4449,
+ "step": 2767
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.0001505005700400182,
+ "loss": 0.569,
+ "step": 2768
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015046797558838535,
+ "loss": 0.4926,
+ "step": 2769
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015043537394112007,
+ "loss": 0.462,
+ "step": 2770
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015040276510287063,
+ "loss": 0.6983,
+ "step": 2771
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015037014907828632,
+ "loss": 0.4644,
+ "step": 2772
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001503375258720175,
+ "loss": 0.5924,
+ "step": 2773
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015030489548871544,
+ "loss": 0.5282,
+ "step": 2774
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015027225793303264,
+ "loss": 0.4757,
+ "step": 2775
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015023961320962247,
+ "loss": 0.5014,
+ "step": 2776
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001502069613231393,
+ "loss": 0.3455,
+ "step": 2777
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015017430227823864,
+ "loss": 0.4525,
+ "step": 2778
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001501416360795769,
+ "loss": 0.51,
+ "step": 2779
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015010896273181165,
+ "loss": 0.3766,
+ "step": 2780
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001500762822396013,
+ "loss": 0.3162,
+ "step": 2781
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00015004359460760546,
+ "loss": 0.406,
+ "step": 2782
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00015001089984048463,
+ "loss": 0.4671,
+ "step": 2783
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014997819794290034,
+ "loss": 0.4299,
+ "step": 2784
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014994548891951524,
+ "loss": 0.5494,
+ "step": 2785
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001499127727749929,
+ "loss": 0.351,
+ "step": 2786
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014988004951399785,
+ "loss": 0.3807,
+ "step": 2787
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014984731914119586,
+ "loss": 0.3999,
+ "step": 2788
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001498145816612534,
+ "loss": 0.7609,
+ "step": 2789
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014978183707883827,
+ "loss": 0.4466,
+ "step": 2790
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014974908539861905,
+ "loss": 0.592,
+ "step": 2791
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014971632662526545,
+ "loss": 0.4786,
+ "step": 2792
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014968356076344814,
+ "loss": 0.4087,
+ "step": 2793
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.0001496507878178388,
+ "loss": 0.3811,
+ "step": 2794
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014961800779311014,
+ "loss": 0.4091,
+ "step": 2795
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014958522069393593,
+ "loss": 0.6861,
+ "step": 2796
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014955242652499084,
+ "loss": 0.3346,
+ "step": 2797
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014951962529095064,
+ "loss": 0.5417,
+ "step": 2798
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.000149486816996492,
+ "loss": 0.7325,
+ "step": 2799
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014945400164629278,
+ "loss": 0.5007,
+ "step": 2800
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014942117924503164,
+ "loss": 0.4217,
+ "step": 2801
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014938834979738835,
+ "loss": 0.5265,
+ "step": 2802
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014935551330804372,
+ "loss": 0.4376,
+ "step": 2803
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.0001493226697816795,
+ "loss": 0.5068,
+ "step": 2804
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014928981922297842,
+ "loss": 0.6248,
+ "step": 2805
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001492569616366243,
+ "loss": 0.593,
+ "step": 2806
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001492240970273019,
+ "loss": 0.6713,
+ "step": 2807
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014919122539969697,
+ "loss": 0.5736,
+ "step": 2808
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014915834675849633,
+ "loss": 0.3006,
+ "step": 2809
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014912546110838775,
+ "loss": 0.5175,
+ "step": 2810
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014909256845405998,
+ "loss": 0.52,
+ "step": 2811
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014905966880020282,
+ "loss": 0.5491,
+ "step": 2812
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014902676215150702,
+ "loss": 0.6007,
+ "step": 2813
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001489938485126644,
+ "loss": 0.6552,
+ "step": 2814
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014896092788836763,
+ "loss": 0.3624,
+ "step": 2815
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001489280002833106,
+ "loss": 0.2626,
+ "step": 2816
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014889506570218796,
+ "loss": 0.409,
+ "step": 2817
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014886212414969553,
+ "loss": 0.473,
+ "step": 2818
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014882917563052998,
+ "loss": 0.4205,
+ "step": 2819
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014879622014938915,
+ "loss": 0.4603,
+ "step": 2820
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001487632577109717,
+ "loss": 0.3522,
+ "step": 2821
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001487302883199774,
+ "loss": 0.3787,
+ "step": 2822
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014869731198110695,
+ "loss": 0.6,
+ "step": 2823
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.000148664328699062,
+ "loss": 0.4291,
+ "step": 2824
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014863133847854533,
+ "loss": 0.4358,
+ "step": 2825
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.0001485983413242606,
+ "loss": 0.4144,
+ "step": 2826
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.0001485653372409125,
+ "loss": 0.842,
+ "step": 2827
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014853232623320662,
+ "loss": 0.3398,
+ "step": 2828
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014849930830584972,
+ "loss": 0.5005,
+ "step": 2829
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014846628346354933,
+ "loss": 0.5777,
+ "step": 2830
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014843325171101413,
+ "loss": 0.3953,
+ "step": 2831
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014840021305295373,
+ "loss": 0.4056,
+ "step": 2832
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014836716749407872,
+ "loss": 0.7682,
+ "step": 2833
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.0001483341150391006,
+ "loss": 0.3208,
+ "step": 2834
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014830105569273204,
+ "loss": 0.4317,
+ "step": 2835
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014826798945968654,
+ "loss": 0.363,
+ "step": 2836
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014823491634467862,
+ "loss": 0.3784,
+ "step": 2837
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014820183635242374,
+ "loss": 0.9267,
+ "step": 2838
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.0001481687494876385,
+ "loss": 0.4245,
+ "step": 2839
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014813565575504022,
+ "loss": 0.3929,
+ "step": 2840
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014810255515934747,
+ "loss": 0.5171,
+ "step": 2841
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014806944770527958,
+ "loss": 0.5181,
+ "step": 2842
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014803633339755703,
+ "loss": 0.4765,
+ "step": 2843
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014800321224090114,
+ "loss": 0.4433,
+ "step": 2844
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014797008424003428,
+ "loss": 0.461,
+ "step": 2845
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.0001479369493996798,
+ "loss": 0.5688,
+ "step": 2846
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014790380772456197,
+ "loss": 0.4822,
+ "step": 2847
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.0001478706592194061,
+ "loss": 0.4993,
+ "step": 2848
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014783750388893842,
+ "loss": 0.3967,
+ "step": 2849
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014780434173788617,
+ "loss": 0.4708,
+ "step": 2850
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014777117277097758,
+ "loss": 0.5721,
+ "step": 2851
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014773799699294176,
+ "loss": 0.5276,
+ "step": 2852
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014770481440850891,
+ "loss": 0.4135,
+ "step": 2853
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.0001476716250224101,
+ "loss": 0.716,
+ "step": 2854
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014763842883937743,
+ "loss": 0.3663,
+ "step": 2855
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014760522586414396,
+ "loss": 0.4105,
+ "step": 2856
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014757201610144372,
+ "loss": 0.4554,
+ "step": 2857
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014753879955601163,
+ "loss": 0.4366,
+ "step": 2858
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.0001475055762325837,
+ "loss": 0.3752,
+ "step": 2859
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014747234613589685,
+ "loss": 0.3747,
+ "step": 2860
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.000147439109270689,
+ "loss": 0.5533,
+ "step": 2861
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014740586564169892,
+ "loss": 0.4962,
+ "step": 2862
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014737261525366648,
+ "loss": 0.5318,
+ "step": 2863
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014733935811133244,
+ "loss": 0.4592,
+ "step": 2864
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014730609421943855,
+ "loss": 0.429,
+ "step": 2865
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014727282358272754,
+ "loss": 0.4163,
+ "step": 2866
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014723954620594304,
+ "loss": 0.4811,
+ "step": 2867
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.0001472062620938297,
+ "loss": 0.4662,
+ "step": 2868
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014717297125113311,
+ "loss": 0.531,
+ "step": 2869
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.0001471396736825998,
+ "loss": 0.3233,
+ "step": 2870
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014710636939297724,
+ "loss": 0.4171,
+ "step": 2871
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.000147073058387014,
+ "loss": 0.5412,
+ "step": 2872
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014703974066945943,
+ "loss": 0.4357,
+ "step": 2873
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014700641624506392,
+ "loss": 0.3889,
+ "step": 2874
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001469730851185788,
+ "loss": 0.456,
+ "step": 2875
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014693974729475636,
+ "loss": 0.4365,
+ "step": 2876
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001469064027783499,
+ "loss": 0.3947,
+ "step": 2877
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014687305157411355,
+ "loss": 0.5718,
+ "step": 2878
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001468396936868025,
+ "loss": 0.4652,
+ "step": 2879
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014680632912117286,
+ "loss": 0.4242,
+ "step": 2880
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.0001467729578819817,
+ "loss": 0.5045,
+ "step": 2881
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014673957997398695,
+ "loss": 0.4098,
+ "step": 2882
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014670619540194766,
+ "loss": 0.597,
+ "step": 2883
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014667280417062374,
+ "loss": 0.5208,
+ "step": 2884
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014663940628477598,
+ "loss": 0.4881,
+ "step": 2885
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014660600174916627,
+ "loss": 0.5234,
+ "step": 2886
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.0001465725905685573,
+ "loss": 0.439,
+ "step": 2887
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014653917274771284,
+ "loss": 0.4498,
+ "step": 2888
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014650574829139747,
+ "loss": 0.4837,
+ "step": 2889
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014647231720437686,
+ "loss": 0.4232,
+ "step": 2890
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014643887949141753,
+ "loss": 0.4467,
+ "step": 2891
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014640543515728695,
+ "loss": 0.3566,
+ "step": 2892
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014637198420675354,
+ "loss": 0.3888,
+ "step": 2893
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014633852664458673,
+ "loss": 0.326,
+ "step": 2894
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.0001463050624755568,
+ "loss": 0.3608,
+ "step": 2895
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014627159170443502,
+ "loss": 0.5326,
+ "step": 2896
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014623811433599359,
+ "loss": 0.3171,
+ "step": 2897
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014620463037500568,
+ "loss": 0.4619,
+ "step": 2898
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014617113982624526,
+ "loss": 0.7739,
+ "step": 2899
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014613764269448751,
+ "loss": 0.4327,
+ "step": 2900
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.0001461041389845083,
+ "loss": 0.6078,
+ "step": 2901
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014607062870108456,
+ "loss": 0.3863,
+ "step": 2902
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014603711184899408,
+ "loss": 0.4787,
+ "step": 2903
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014600358843301568,
+ "loss": 0.2997,
+ "step": 2904
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014597005845792905,
+ "loss": 0.3657,
+ "step": 2905
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014593652192851486,
+ "loss": 0.334,
+ "step": 2906
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014590297884955463,
+ "loss": 0.6809,
+ "step": 2907
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.0001458694292258309,
+ "loss": 0.4739,
+ "step": 2908
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014583587306212713,
+ "loss": 0.4139,
+ "step": 2909
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014580231036322768,
+ "loss": 0.3307,
+ "step": 2910
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014576874113391789,
+ "loss": 0.4155,
+ "step": 2911
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014573516537898394,
+ "loss": 0.4461,
+ "step": 2912
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014570158310321305,
+ "loss": 0.4775,
+ "step": 2913
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.0001456679943113933,
+ "loss": 0.344,
+ "step": 2914
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014563439900831373,
+ "loss": 0.3568,
+ "step": 2915
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014560079719876424,
+ "loss": 0.3808,
+ "step": 2916
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.0001455671888875358,
+ "loss": 0.5467,
+ "step": 2917
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014553357407942022,
+ "loss": 0.5267,
+ "step": 2918
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014549995277921015,
+ "loss": 0.4476,
+ "step": 2919
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014546632499169937,
+ "loss": 0.4463,
+ "step": 2920
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014543269072168235,
+ "loss": 0.5553,
+ "step": 2921
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014539904997395468,
+ "loss": 0.5476,
+ "step": 2922
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001453654027533128,
+ "loss": 0.4443,
+ "step": 2923
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014533174906455404,
+ "loss": 0.4353,
+ "step": 2924
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014529808891247667,
+ "loss": 0.4479,
+ "step": 2925
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014526442230187995,
+ "loss": 0.3951,
+ "step": 2926
+ },
+ {
+ "epoch": 3.47,
+ "eval_loss": 2.882225751876831,
+ "eval_runtime": 283.9462,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 2926
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014523074923756394,
+ "loss": 0.679,
+ "step": 2927
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001451970697243297,
+ "loss": 0.4178,
+ "step": 2928
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001451633837669792,
+ "loss": 0.4121,
+ "step": 2929
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014512969137031538,
+ "loss": 0.3929,
+ "step": 2930
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014509599253914195,
+ "loss": 0.366,
+ "step": 2931
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.0001450622872782637,
+ "loss": 0.3528,
+ "step": 2932
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014502857559248617,
+ "loss": 0.5003,
+ "step": 2933
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014499485748661604,
+ "loss": 0.4901,
+ "step": 2934
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014496113296546067,
+ "loss": 0.4538,
+ "step": 2935
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014492740203382847,
+ "loss": 0.4549,
+ "step": 2936
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.0001448936646965288,
+ "loss": 0.5464,
+ "step": 2937
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014485992095837177,
+ "loss": 0.43,
+ "step": 2938
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014482617082416858,
+ "loss": 0.5893,
+ "step": 2939
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001447924142987312,
+ "loss": 0.4947,
+ "step": 2940
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014475865138687262,
+ "loss": 0.4903,
+ "step": 2941
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001447248820934067,
+ "loss": 0.4933,
+ "step": 2942
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014469110642314817,
+ "loss": 0.4516,
+ "step": 2943
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001446573243809127,
+ "loss": 0.469,
+ "step": 2944
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014462353597151684,
+ "loss": 0.6531,
+ "step": 2945
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014458974119977818,
+ "loss": 0.2754,
+ "step": 2946
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001445559400705151,
+ "loss": 0.5676,
+ "step": 2947
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014452213258854684,
+ "loss": 0.5903,
+ "step": 2948
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014448831875869364,
+ "loss": 0.5022,
+ "step": 2949
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.0001444544985857766,
+ "loss": 0.3509,
+ "step": 2950
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014442067207461775,
+ "loss": 0.3921,
+ "step": 2951
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014438683923004005,
+ "loss": 0.4997,
+ "step": 2952
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014435300005686728,
+ "loss": 0.6218,
+ "step": 2953
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014431915455992414,
+ "loss": 0.4097,
+ "step": 2954
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014428530274403632,
+ "loss": 0.3478,
+ "step": 2955
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014425144461403035,
+ "loss": 0.4506,
+ "step": 2956
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014421758017473362,
+ "loss": 0.4025,
+ "step": 2957
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014418370943097448,
+ "loss": 0.3838,
+ "step": 2958
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014414983238758217,
+ "loss": 0.6366,
+ "step": 2959
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014411594904938682,
+ "loss": 0.4649,
+ "step": 2960
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014408205942121943,
+ "loss": 0.3361,
+ "step": 2961
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014404816350791188,
+ "loss": 0.3692,
+ "step": 2962
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.0001440142613142971,
+ "loss": 0.6162,
+ "step": 2963
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014398035284520874,
+ "loss": 0.5935,
+ "step": 2964
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001439464381054814,
+ "loss": 0.545,
+ "step": 2965
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014391251709995061,
+ "loss": 0.4178,
+ "step": 2966
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014387858983345276,
+ "loss": 0.5552,
+ "step": 2967
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001438446563108251,
+ "loss": 0.4506,
+ "step": 2968
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014381071653690587,
+ "loss": 0.429,
+ "step": 2969
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014377677051653404,
+ "loss": 0.3897,
+ "step": 2970
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001437428182545497,
+ "loss": 0.4663,
+ "step": 2971
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014370885975579364,
+ "loss": 0.4643,
+ "step": 2972
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001436748950251076,
+ "loss": 0.5433,
+ "step": 2973
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001436409240673342,
+ "loss": 0.4967,
+ "step": 2974
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.000143606946887317,
+ "loss": 0.3717,
+ "step": 2975
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014357296348990037,
+ "loss": 0.4166,
+ "step": 2976
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001435389738799296,
+ "loss": 0.455,
+ "step": 2977
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014350497806225087,
+ "loss": 0.4603,
+ "step": 2978
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014347097604171127,
+ "loss": 0.4325,
+ "step": 2979
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001434369678231587,
+ "loss": 0.4375,
+ "step": 2980
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014340295341144202,
+ "loss": 0.4932,
+ "step": 2981
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014336893281141096,
+ "loss": 0.5264,
+ "step": 2982
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014333490602791608,
+ "loss": 0.4677,
+ "step": 2983
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014330087306580887,
+ "loss": 0.6505,
+ "step": 2984
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014326683392994167,
+ "loss": 0.4451,
+ "step": 2985
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014323278862516775,
+ "loss": 0.4025,
+ "step": 2986
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.0001431987371563412,
+ "loss": 0.5084,
+ "step": 2987
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.000143164679528317,
+ "loss": 0.4806,
+ "step": 2988
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014313061574595115,
+ "loss": 0.3954,
+ "step": 2989
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014309654581410024,
+ "loss": 0.4339,
+ "step": 2990
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.000143062469737622,
+ "loss": 0.6739,
+ "step": 2991
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014302838752137487,
+ "loss": 0.6414,
+ "step": 2992
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014299429917021827,
+ "loss": 0.5075,
+ "step": 2993
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014296020468901246,
+ "loss": 0.4105,
+ "step": 2994
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014292610408261856,
+ "loss": 0.7371,
+ "step": 2995
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014289199735589852,
+ "loss": 0.7485,
+ "step": 2996
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014285788451371534,
+ "loss": 0.7629,
+ "step": 2997
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014282376556093264,
+ "loss": 0.3849,
+ "step": 2998
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014278964050241512,
+ "loss": 0.5355,
+ "step": 2999
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014275550934302823,
+ "loss": 0.4077,
+ "step": 3000
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014272137208763832,
+ "loss": 0.5352,
+ "step": 3001
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014268722874111265,
+ "loss": 0.5257,
+ "step": 3002
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014265307930831932,
+ "loss": 0.4265,
+ "step": 3003
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014261892379412728,
+ "loss": 0.5776,
+ "step": 3004
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.0001425847622034063,
+ "loss": 0.3521,
+ "step": 3005
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014255059454102722,
+ "loss": 0.6203,
+ "step": 3006
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014251642081186146,
+ "loss": 0.5238,
+ "step": 3007
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014248224102078152,
+ "loss": 0.3887,
+ "step": 3008
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014244805517266067,
+ "loss": 0.5001,
+ "step": 3009
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001424138632723731,
+ "loss": 0.555,
+ "step": 3010
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001423796653247938,
+ "loss": 0.6137,
+ "step": 3011
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014234546133479867,
+ "loss": 0.8052,
+ "step": 3012
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001423112513072644,
+ "loss": 0.5392,
+ "step": 3013
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014227703524706867,
+ "loss": 0.5067,
+ "step": 3014
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.0001422428131590899,
+ "loss": 0.4016,
+ "step": 3015
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014220858504820742,
+ "loss": 0.4165,
+ "step": 3016
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014217435091930141,
+ "loss": 0.7395,
+ "step": 3017
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014214011077725292,
+ "loss": 0.4985,
+ "step": 3018
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014210586462694384,
+ "loss": 0.4821,
+ "step": 3019
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014207161247325691,
+ "loss": 0.6046,
+ "step": 3020
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014203735432107576,
+ "loss": 0.568,
+ "step": 3021
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014200309017528486,
+ "loss": 0.7383,
+ "step": 3022
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.0001419688200407695,
+ "loss": 0.5296,
+ "step": 3023
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014193454392241592,
+ "loss": 0.6391,
+ "step": 3024
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014190026182511102,
+ "loss": 0.4523,
+ "step": 3025
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.0001418659737537428,
+ "loss": 0.482,
+ "step": 3026
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014183167971319998,
+ "loss": 0.4519,
+ "step": 3027
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014179737970837207,
+ "loss": 0.4156,
+ "step": 3028
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014176307374414956,
+ "loss": 0.5142,
+ "step": 3029
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014172876182542372,
+ "loss": 0.4068,
+ "step": 3030
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014169444395708666,
+ "loss": 0.5908,
+ "step": 3031
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.0001416601201440314,
+ "loss": 0.511,
+ "step": 3032
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014162579039115174,
+ "loss": 0.5165,
+ "step": 3033
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014159145470334235,
+ "loss": 0.4449,
+ "step": 3034
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014155711308549878,
+ "loss": 0.4808,
+ "step": 3035
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014152276554251736,
+ "loss": 0.5365,
+ "step": 3036
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014148841207929527,
+ "loss": 0.6016,
+ "step": 3037
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.0001414540527007307,
+ "loss": 0.379,
+ "step": 3038
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014141968741172238,
+ "loss": 0.6687,
+ "step": 3039
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014138531621717018,
+ "loss": 0.6219,
+ "step": 3040
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001413509391219746,
+ "loss": 0.3408,
+ "step": 3041
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014131655613103708,
+ "loss": 0.5148,
+ "step": 3042
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001412821672492599,
+ "loss": 0.3811,
+ "step": 3043
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001412477724815462,
+ "loss": 0.4691,
+ "step": 3044
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014121337183279988,
+ "loss": 0.6919,
+ "step": 3045
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001411789653079257,
+ "loss": 0.5804,
+ "step": 3046
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014114455291182933,
+ "loss": 0.418,
+ "step": 3047
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001411101346494172,
+ "loss": 0.4422,
+ "step": 3048
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410757105255966,
+ "loss": 0.389,
+ "step": 3049
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410412805452757,
+ "loss": 0.4083,
+ "step": 3050
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410068447133634,
+ "loss": 0.8703,
+ "step": 3051
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014097240303476954,
+ "loss": 0.4724,
+ "step": 3052
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014093795551440474,
+ "loss": 0.6257,
+ "step": 3053
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014090350215718048,
+ "loss": 0.5212,
+ "step": 3054
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014086904296800902,
+ "loss": 0.4429,
+ "step": 3055
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014083457795180355,
+ "loss": 0.3496,
+ "step": 3056
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014080010711347798,
+ "loss": 0.3402,
+ "step": 3057
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.0001407656304579471,
+ "loss": 0.4783,
+ "step": 3058
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014073114799012653,
+ "loss": 0.3987,
+ "step": 3059
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014069665971493274,
+ "loss": 0.4755,
+ "step": 3060
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014066216563728303,
+ "loss": 0.4792,
+ "step": 3061
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014062766576209546,
+ "loss": 0.4275,
+ "step": 3062
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014059316009428893,
+ "loss": 0.3598,
+ "step": 3063
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014055864863878325,
+ "loss": 0.4887,
+ "step": 3064
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.000140524131400499,
+ "loss": 0.5421,
+ "step": 3065
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014048960838435753,
+ "loss": 0.352,
+ "step": 3066
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014045507959528118,
+ "loss": 0.3124,
+ "step": 3067
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014042054503819287,
+ "loss": 0.3955,
+ "step": 3068
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014038600471801658,
+ "loss": 0.455,
+ "step": 3069
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014035145863967692,
+ "loss": 0.5177,
+ "step": 3070
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014031690680809945,
+ "loss": 0.4205,
+ "step": 3071
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014028234922821054,
+ "loss": 0.4832,
+ "step": 3072
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001402477859049373,
+ "loss": 0.3496,
+ "step": 3073
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001402132168432077,
+ "loss": 0.5404,
+ "step": 3074
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014017864204795058,
+ "loss": 0.5106,
+ "step": 3075
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001401440615240955,
+ "loss": 0.6611,
+ "step": 3076
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014010947527657295,
+ "loss": 0.3879,
+ "step": 3077
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001400748833103141,
+ "loss": 0.3054,
+ "step": 3078
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014004028563025108,
+ "loss": 0.3461,
+ "step": 3079
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001400056822413167,
+ "loss": 0.482,
+ "step": 3080
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.0001399710731484447,
+ "loss": 0.3285,
+ "step": 3081
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013993645835656953,
+ "loss": 0.363,
+ "step": 3082
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013990183787062661,
+ "loss": 0.5092,
+ "step": 3083
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013986721169555194,
+ "loss": 0.3009,
+ "step": 3084
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013983257983628253,
+ "loss": 0.3831,
+ "step": 3085
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.0001397979422977561,
+ "loss": 0.3718,
+ "step": 3086
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013976329908491118,
+ "loss": 0.3401,
+ "step": 3087
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013972865020268722,
+ "loss": 0.5294,
+ "step": 3088
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013969399565602435,
+ "loss": 0.5054,
+ "step": 3089
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001396593354498635,
+ "loss": 0.4247,
+ "step": 3090
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013962466958914658,
+ "loss": 0.431,
+ "step": 3091
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013958999807881604,
+ "loss": 0.6341,
+ "step": 3092
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001395553209238154,
+ "loss": 0.5126,
+ "step": 3093
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013952063812908881,
+ "loss": 0.3775,
+ "step": 3094
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001394859496995813,
+ "loss": 0.5149,
+ "step": 3095
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013945125564023868,
+ "loss": 0.2879,
+ "step": 3096
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013941655595600756,
+ "loss": 0.5621,
+ "step": 3097
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013938185065183532,
+ "loss": 0.408,
+ "step": 3098
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013934713973267024,
+ "loss": 0.4247,
+ "step": 3099
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001393124232034613,
+ "loss": 0.4224,
+ "step": 3100
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001392777010691584,
+ "loss": 0.4142,
+ "step": 3101
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013924297333471204,
+ "loss": 0.6004,
+ "step": 3102
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013920824000507374,
+ "loss": 0.6016,
+ "step": 3103
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001391735010851956,
+ "loss": 0.4669,
+ "step": 3104
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013913875658003074,
+ "loss": 0.3987,
+ "step": 3105
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001391040064945329,
+ "loss": 0.471,
+ "step": 3106
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001390692508336568,
+ "loss": 0.6135,
+ "step": 3107
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013903448960235766,
+ "loss": 0.5369,
+ "step": 3108
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013899972280559183,
+ "loss": 0.3295,
+ "step": 3109
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001389649504483162,
+ "loss": 0.309,
+ "step": 3110
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013893017253548858,
+ "loss": 0.4026,
+ "step": 3111
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013889538907206755,
+ "loss": 0.4724,
+ "step": 3112
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001388606000630125,
+ "loss": 0.3606,
+ "step": 3113
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001388258055132835,
+ "loss": 0.4894,
+ "step": 3114
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001387910054278416,
+ "loss": 0.4832,
+ "step": 3115
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001387561998116484,
+ "loss": 0.4604,
+ "step": 3116
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013872138866966656,
+ "loss": 0.4377,
+ "step": 3117
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013868657200685934,
+ "loss": 0.3965,
+ "step": 3118
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001386517498281908,
+ "loss": 0.7653,
+ "step": 3119
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013861692213862584,
+ "loss": 0.5213,
+ "step": 3120
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013858208894313017,
+ "loss": 0.9296,
+ "step": 3121
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013854725024667016,
+ "loss": 0.7738,
+ "step": 3122
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013851240605421315,
+ "loss": 0.5826,
+ "step": 3123
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.0001384775563707271,
+ "loss": 0.5502,
+ "step": 3124
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013844270120118085,
+ "loss": 0.3535,
+ "step": 3125
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.0001384078405505439,
+ "loss": 0.4853,
+ "step": 3126
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013837297442378675,
+ "loss": 0.5819,
+ "step": 3127
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013833810282588044,
+ "loss": 0.3728,
+ "step": 3128
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013830322576179697,
+ "loss": 0.3327,
+ "step": 3129
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.000138268343236509,
+ "loss": 0.4618,
+ "step": 3130
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013823345525499004,
+ "loss": 0.3377,
+ "step": 3131
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013819856182221434,
+ "loss": 0.3154,
+ "step": 3132
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013816366294315695,
+ "loss": 0.5116,
+ "step": 3133
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.0001381287586227937,
+ "loss": 0.4987,
+ "step": 3134
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013809384886610118,
+ "loss": 0.5596,
+ "step": 3135
+ },
+ {
+ "epoch": 3.72,
+ "eval_loss": 2.939779281616211,
+ "eval_runtime": 283.9953,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 3135
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013805893367805678,
+ "loss": 0.5128,
+ "step": 3136
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.0001380240130636386,
+ "loss": 0.3149,
+ "step": 3137
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013798908702782558,
+ "loss": 0.4984,
+ "step": 3138
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.0001379541555755974,
+ "loss": 0.626,
+ "step": 3139
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013791921871193457,
+ "loss": 0.4949,
+ "step": 3140
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013788427644181823,
+ "loss": 0.5654,
+ "step": 3141
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.0001378493287702305,
+ "loss": 0.4197,
+ "step": 3142
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013781437570215406,
+ "loss": 0.4341,
+ "step": 3143
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013777941724257253,
+ "loss": 0.3576,
+ "step": 3144
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013774445339647014,
+ "loss": 0.3098,
+ "step": 3145
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013770948416883205,
+ "loss": 0.6052,
+ "step": 3146
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013767450956464407,
+ "loss": 0.4327,
+ "step": 3147
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013763952958889287,
+ "loss": 0.4717,
+ "step": 3148
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.0001376045442465657,
+ "loss": 0.5263,
+ "step": 3149
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013756955354265085,
+ "loss": 0.5021,
+ "step": 3150
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013753455748213714,
+ "loss": 0.4066,
+ "step": 3151
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013749955607001433,
+ "loss": 0.3461,
+ "step": 3152
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013746454931127278,
+ "loss": 0.4318,
+ "step": 3153
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013742953721090372,
+ "loss": 0.4195,
+ "step": 3154
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.0001373945197738991,
+ "loss": 0.3862,
+ "step": 3155
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013735949700525163,
+ "loss": 0.5916,
+ "step": 3156
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013732446890995484,
+ "loss": 0.5336,
+ "step": 3157
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013728943549300295,
+ "loss": 0.4104,
+ "step": 3158
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013725439675939095,
+ "loss": 0.541,
+ "step": 3159
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013721935271411464,
+ "loss": 0.5173,
+ "step": 3160
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013718430336217045,
+ "loss": 0.3866,
+ "step": 3161
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013714924870855571,
+ "loss": 0.6113,
+ "step": 3162
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013711418875826846,
+ "loss": 0.5817,
+ "step": 3163
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001370791235163075,
+ "loss": 0.5331,
+ "step": 3164
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013704405298767229,
+ "loss": 0.5744,
+ "step": 3165
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001370089771773632,
+ "loss": 0.494,
+ "step": 3166
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013697389609038124,
+ "loss": 0.4537,
+ "step": 3167
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013693880973172822,
+ "loss": 0.5494,
+ "step": 3168
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013690371810640665,
+ "loss": 0.537,
+ "step": 3169
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001368686212194199,
+ "loss": 0.4698,
+ "step": 3170
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013683351907577194,
+ "loss": 0.5254,
+ "step": 3171
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013679841168046767,
+ "loss": 0.3857,
+ "step": 3172
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013676329903851254,
+ "loss": 0.4464,
+ "step": 3173
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.0001367281811549129,
+ "loss": 0.5651,
+ "step": 3174
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.0001366930580346758,
+ "loss": 0.4192,
+ "step": 3175
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.000136657929682809,
+ "loss": 0.3364,
+ "step": 3176
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013662279610432104,
+ "loss": 0.3539,
+ "step": 3177
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013658765730422125,
+ "loss": 0.6074,
+ "step": 3178
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013655251328751957,
+ "loss": 0.5322,
+ "step": 3179
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013651736405922686,
+ "loss": 0.4176,
+ "step": 3180
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013648220962435458,
+ "loss": 0.4878,
+ "step": 3181
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.000136447049987915,
+ "loss": 0.6351,
+ "step": 3182
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013641188515492109,
+ "loss": 0.4487,
+ "step": 3183
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001363767151303866,
+ "loss": 0.4451,
+ "step": 3184
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013634153991932607,
+ "loss": 0.4944,
+ "step": 3185
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001363063595267547,
+ "loss": 0.5932,
+ "step": 3186
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013627117395768833,
+ "loss": 0.4964,
+ "step": 3187
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001362359832171438,
+ "loss": 0.6795,
+ "step": 3188
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013620078731013845,
+ "loss": 0.3862,
+ "step": 3189
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001361655862416905,
+ "loss": 0.3425,
+ "step": 3190
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001361303800168188,
+ "loss": 0.4361,
+ "step": 3191
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001360951686405431,
+ "loss": 0.5774,
+ "step": 3192
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013605995211788365,
+ "loss": 0.4044,
+ "step": 3193
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013602473045386165,
+ "loss": 0.3858,
+ "step": 3194
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013598950365349883,
+ "loss": 0.6136,
+ "step": 3195
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013595427172181785,
+ "loss": 0.329,
+ "step": 3196
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013591903466384203,
+ "loss": 0.3898,
+ "step": 3197
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013588379248459536,
+ "loss": 0.4809,
+ "step": 3198
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013584854518910262,
+ "loss": 0.4108,
+ "step": 3199
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013581329278238927,
+ "loss": 0.4655,
+ "step": 3200
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013577803526948162,
+ "loss": 0.4657,
+ "step": 3201
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013574277265540654,
+ "loss": 0.4842,
+ "step": 3202
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013570750494519175,
+ "loss": 0.4593,
+ "step": 3203
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013567223214386564,
+ "loss": 0.435,
+ "step": 3204
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013563695425645737,
+ "loss": 0.7146,
+ "step": 3205
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013560167128799674,
+ "loss": 0.5027,
+ "step": 3206
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013556638324351442,
+ "loss": 0.4844,
+ "step": 3207
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013553109012804163,
+ "loss": 0.7605,
+ "step": 3208
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013549579194661044,
+ "loss": 0.396,
+ "step": 3209
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013546048870425356,
+ "loss": 0.5178,
+ "step": 3210
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013542518040600453,
+ "loss": 0.6946,
+ "step": 3211
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.0001353898670568975,
+ "loss": 0.5054,
+ "step": 3212
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013535454866196739,
+ "loss": 0.4495,
+ "step": 3213
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013531922522624982,
+ "loss": 0.5138,
+ "step": 3214
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001352838967547812,
+ "loss": 0.4706,
+ "step": 3215
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013524856325259848,
+ "loss": 0.5193,
+ "step": 3216
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001352132247247396,
+ "loss": 0.4436,
+ "step": 3217
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013517788117624292,
+ "loss": 0.4139,
+ "step": 3218
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001351425326121478,
+ "loss": 0.5937,
+ "step": 3219
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.000135107179037494,
+ "loss": 0.3375,
+ "step": 3220
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013507182045732234,
+ "loss": 0.3712,
+ "step": 3221
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013503645687667408,
+ "loss": 0.3424,
+ "step": 3222
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013500108830059133,
+ "loss": 0.3333,
+ "step": 3223
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013496571473411688,
+ "loss": 0.4042,
+ "step": 3224
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013493033618229417,
+ "loss": 0.4963,
+ "step": 3225
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.0001348949526501675,
+ "loss": 0.3946,
+ "step": 3226
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013485956414278178,
+ "loss": 0.5807,
+ "step": 3227
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013482417066518256,
+ "loss": 0.4561,
+ "step": 3228
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013478877222241627,
+ "loss": 0.4964,
+ "step": 3229
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013475336881952986,
+ "loss": 0.6429,
+ "step": 3230
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013471796046157116,
+ "loss": 0.5466,
+ "step": 3231
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013468254715358861,
+ "loss": 0.3882,
+ "step": 3232
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013464712890063138,
+ "loss": 0.5006,
+ "step": 3233
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.0001346117057077493,
+ "loss": 0.494,
+ "step": 3234
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013457627757999303,
+ "loss": 0.5444,
+ "step": 3235
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013454084452241372,
+ "loss": 0.3714,
+ "step": 3236
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013450540654006348,
+ "loss": 0.3335,
+ "step": 3237
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.0001344699636379949,
+ "loss": 0.4771,
+ "step": 3238
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013443451582126144,
+ "loss": 0.466,
+ "step": 3239
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013439906309491712,
+ "loss": 0.5537,
+ "step": 3240
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013436360546401676,
+ "loss": 0.5899,
+ "step": 3241
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013432814293361584,
+ "loss": 0.443,
+ "step": 3242
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013429267550877055,
+ "loss": 0.4238,
+ "step": 3243
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013425720319453773,
+ "loss": 0.6529,
+ "step": 3244
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013422172599597505,
+ "loss": 0.6163,
+ "step": 3245
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013418624391814068,
+ "loss": 0.5183,
+ "step": 3246
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013415075696609363,
+ "loss": 0.7659,
+ "step": 3247
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001341152651448936,
+ "loss": 0.3717,
+ "step": 3248
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340797684596009,
+ "loss": 0.6885,
+ "step": 3249
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340442669152766,
+ "loss": 0.4483,
+ "step": 3250
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340087605169825,
+ "loss": 0.3417,
+ "step": 3251
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013397324926978094,
+ "loss": 0.4751,
+ "step": 3252
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013393773317873508,
+ "loss": 0.4448,
+ "step": 3253
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013390221224890878,
+ "loss": 0.6278,
+ "step": 3254
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013386668648536655,
+ "loss": 0.2995,
+ "step": 3255
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013383115589317353,
+ "loss": 0.535,
+ "step": 3256
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013379562047739568,
+ "loss": 0.4972,
+ "step": 3257
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013376008024309948,
+ "loss": 0.4821,
+ "step": 3258
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.0001337245351953523,
+ "loss": 0.392,
+ "step": 3259
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.000133688985339222,
+ "loss": 0.413,
+ "step": 3260
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013365343067977726,
+ "loss": 0.4689,
+ "step": 3261
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013361787122208744,
+ "loss": 0.4737,
+ "step": 3262
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013358230697122246,
+ "loss": 0.5033,
+ "step": 3263
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013354673793225302,
+ "loss": 0.4901,
+ "step": 3264
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013351116411025054,
+ "loss": 0.5776,
+ "step": 3265
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013347558551028702,
+ "loss": 0.5005,
+ "step": 3266
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013344000213743522,
+ "loss": 0.6475,
+ "step": 3267
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013340441399676856,
+ "loss": 0.4394,
+ "step": 3268
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001333688210933611,
+ "loss": 0.4351,
+ "step": 3269
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001333332234322876,
+ "loss": 0.4526,
+ "step": 3270
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001332976210186236,
+ "loss": 0.3006,
+ "step": 3271
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013326201385744518,
+ "loss": 0.382,
+ "step": 3272
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013322640195382907,
+ "loss": 0.3488,
+ "step": 3273
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013319078531285285,
+ "loss": 0.5538,
+ "step": 3274
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013315516393959463,
+ "loss": 0.5328,
+ "step": 3275
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013311953783913324,
+ "loss": 0.5216,
+ "step": 3276
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001330839070165482,
+ "loss": 0.3845,
+ "step": 3277
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001330482714769197,
+ "loss": 0.5293,
+ "step": 3278
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013301263122532855,
+ "loss": 0.5415,
+ "step": 3279
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001329769862668563,
+ "loss": 0.5309,
+ "step": 3280
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013294133660658516,
+ "loss": 0.4629,
+ "step": 3281
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013290568224959794,
+ "loss": 0.4329,
+ "step": 3282
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013287002320097821,
+ "loss": 0.3973,
+ "step": 3283
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.0001328343594658102,
+ "loss": 0.3417,
+ "step": 3284
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013279869104917873,
+ "loss": 0.4784,
+ "step": 3285
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013276301795616936,
+ "loss": 0.3668,
+ "step": 3286
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.0001327273401918683,
+ "loss": 0.3726,
+ "step": 3287
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013269165776136238,
+ "loss": 0.518,
+ "step": 3288
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013265597066973922,
+ "loss": 0.3864,
+ "step": 3289
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013262027892208694,
+ "loss": 0.4249,
+ "step": 3290
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013258458252349444,
+ "loss": 0.395,
+ "step": 3291
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013254888147905126,
+ "loss": 0.8359,
+ "step": 3292
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013251317579384756,
+ "loss": 0.5028,
+ "step": 3293
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.0001324774654729742,
+ "loss": 0.4216,
+ "step": 3294
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.0001324417505215227,
+ "loss": 0.6145,
+ "step": 3295
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013240603094458522,
+ "loss": 0.6158,
+ "step": 3296
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013237030674725464,
+ "loss": 0.5101,
+ "step": 3297
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001323345779346244,
+ "loss": 0.6933,
+ "step": 3298
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001322988445117886,
+ "loss": 0.4192,
+ "step": 3299
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001322631064838422,
+ "loss": 0.4549,
+ "step": 3300
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013222736385588054,
+ "loss": 0.4947,
+ "step": 3301
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013219161663299982,
+ "loss": 0.5383,
+ "step": 3302
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013215586482029669,
+ "loss": 0.4919,
+ "step": 3303
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001321201084228687,
+ "loss": 0.4603,
+ "step": 3304
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013208434744581385,
+ "loss": 0.3127,
+ "step": 3305
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013204858189423097,
+ "loss": 0.754,
+ "step": 3306
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013201281177321935,
+ "loss": 0.3746,
+ "step": 3307
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013197703708787913,
+ "loss": 0.5576,
+ "step": 3308
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.0001319412578433109,
+ "loss": 0.4992,
+ "step": 3309
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013190547404461598,
+ "loss": 0.4533,
+ "step": 3310
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.0001318696856968965,
+ "loss": 0.4155,
+ "step": 3311
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013183389280525497,
+ "loss": 0.3661,
+ "step": 3312
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013179809537479476,
+ "loss": 0.4512,
+ "step": 3313
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013176229341061975,
+ "loss": 0.5895,
+ "step": 3314
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013172648691783454,
+ "loss": 0.3308,
+ "step": 3315
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013169067590154432,
+ "loss": 0.4128,
+ "step": 3316
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013165486036685503,
+ "loss": 0.5432,
+ "step": 3317
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001316190403188731,
+ "loss": 0.4297,
+ "step": 3318
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013158321576270575,
+ "loss": 0.4259,
+ "step": 3319
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001315473867034608,
+ "loss": 0.4428,
+ "step": 3320
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001315115531462466,
+ "loss": 0.6495,
+ "step": 3321
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013147571509617228,
+ "loss": 0.5706,
+ "step": 3322
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001314398725583476,
+ "loss": 0.3647,
+ "step": 3323
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001314040255378829,
+ "loss": 0.4864,
+ "step": 3324
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013136817403988917,
+ "loss": 0.4197,
+ "step": 3325
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013133231806947805,
+ "loss": 0.4818,
+ "step": 3326
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013129645763176184,
+ "loss": 0.4201,
+ "step": 3327
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001312605927318534,
+ "loss": 0.4352,
+ "step": 3328
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001312247233748664,
+ "loss": 0.2785,
+ "step": 3329
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001311888495659149,
+ "loss": 0.4424,
+ "step": 3330
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013115297131011382,
+ "loss": 0.4258,
+ "step": 3331
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013111708861257855,
+ "loss": 0.4332,
+ "step": 3332
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013108120147842519,
+ "loss": 0.3578,
+ "step": 3333
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.0001310453099127705,
+ "loss": 0.4219,
+ "step": 3334
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.0001310094139207318,
+ "loss": 0.5837,
+ "step": 3335
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.0001309735135074271,
+ "loss": 0.3965,
+ "step": 3336
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013093760867797502,
+ "loss": 0.4764,
+ "step": 3337
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013090169943749476,
+ "loss": 0.4933,
+ "step": 3338
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013086578579110623,
+ "loss": 0.3434,
+ "step": 3339
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001308298677439299,
+ "loss": 0.5931,
+ "step": 3340
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013079394530108695,
+ "loss": 0.442,
+ "step": 3341
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001307580184676991,
+ "loss": 0.3229,
+ "step": 3342
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001307220872488888,
+ "loss": 0.4567,
+ "step": 3343
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013068615164977895,
+ "loss": 0.6224,
+ "step": 3344
+ },
+ {
+ "epoch": 3.97,
+ "eval_loss": 2.954587936401367,
+ "eval_runtime": 283.9817,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 3344
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013065021167549322,
+ "loss": 0.6767,
+ "step": 3345
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001306142673311559,
+ "loss": 0.4809,
+ "step": 3346
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013057831862189187,
+ "loss": 0.4563,
+ "step": 3347
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013054236555282657,
+ "loss": 0.4674,
+ "step": 3348
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013050640812908623,
+ "loss": 0.6636,
+ "step": 3349
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013047044635579747,
+ "loss": 0.4652,
+ "step": 3350
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013043448023808773,
+ "loss": 0.3912,
+ "step": 3351
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.000130398509781085,
+ "loss": 0.6064,
+ "step": 3352
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013036253498991787,
+ "loss": 0.5975,
+ "step": 3353
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013032655586971552,
+ "loss": 0.7249,
+ "step": 3354
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013029057242560784,
+ "loss": 0.4604,
+ "step": 3355
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013025458466272525,
+ "loss": 0.4895,
+ "step": 3356
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.0001302185925861988,
+ "loss": 0.3628,
+ "step": 3357
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013018259620116025,
+ "loss": 0.4798,
+ "step": 3358
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013014659551274189,
+ "loss": 0.663,
+ "step": 3359
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013011059052607656,
+ "loss": 0.3923,
+ "step": 3360
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013007458124629785,
+ "loss": 0.5601,
+ "step": 3361
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013003856767853983,
+ "loss": 0.67,
+ "step": 3362
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013000254982793735,
+ "loss": 0.5059,
+ "step": 3363
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012996652769962566,
+ "loss": 0.4992,
+ "step": 3364
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012993050129874082,
+ "loss": 0.6196,
+ "step": 3365
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012989447063041935,
+ "loss": 0.4157,
+ "step": 3366
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012985843569979848,
+ "loss": 0.5714,
+ "step": 3367
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.000129822396512016,
+ "loss": 0.7484,
+ "step": 3368
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012978635307221026,
+ "loss": 0.3928,
+ "step": 3369
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012975030538552032,
+ "loss": 0.4129,
+ "step": 3370
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.0001297142534570858,
+ "loss": 0.5407,
+ "step": 3371
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012967819729204684,
+ "loss": 0.479,
+ "step": 3372
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012964213689554437,
+ "loss": 0.4492,
+ "step": 3373
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012960607227271973,
+ "loss": 0.4574,
+ "step": 3374
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012957000342871502,
+ "loss": 0.7554,
+ "step": 3375
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012953393036867282,
+ "loss": 0.3038,
+ "step": 3376
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001294978530977364,
+ "loss": 0.5125,
+ "step": 3377
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001294617716210495,
+ "loss": 0.7192,
+ "step": 3378
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012942568594375667,
+ "loss": 0.4371,
+ "step": 3379
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012938959607100288,
+ "loss": 0.3672,
+ "step": 3380
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012935350200793378,
+ "loss": 0.4752,
+ "step": 3381
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.0001293174037596956,
+ "loss": 0.225,
+ "step": 3382
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012928130133143512,
+ "loss": 0.2106,
+ "step": 3383
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012924519472829978,
+ "loss": 0.213,
+ "step": 3384
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001292090839554376,
+ "loss": 0.2775,
+ "step": 3385
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001291729690179972,
+ "loss": 0.2417,
+ "step": 3386
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001291368499211278,
+ "loss": 0.2212,
+ "step": 3387
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012910072666997912,
+ "loss": 0.2644,
+ "step": 3388
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012906459926970162,
+ "loss": 0.2206,
+ "step": 3389
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012902846772544624,
+ "loss": 0.2238,
+ "step": 3390
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012899233204236455,
+ "loss": 0.2212,
+ "step": 3391
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012895619222560878,
+ "loss": 0.2082,
+ "step": 3392
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012892004828033156,
+ "loss": 0.2896,
+ "step": 3393
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012888390021168636,
+ "loss": 0.2351,
+ "step": 3394
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012884774802482697,
+ "loss": 0.2263,
+ "step": 3395
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.000128811591724908,
+ "loss": 0.2243,
+ "step": 3396
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.0001287754313170845,
+ "loss": 0.2433,
+ "step": 3397
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012873926680651222,
+ "loss": 0.2566,
+ "step": 3398
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012870309819834735,
+ "loss": 0.2537,
+ "step": 3399
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012866692549774682,
+ "loss": 0.298,
+ "step": 3400
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.000128630748709868,
+ "loss": 0.2246,
+ "step": 3401
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012859456783986893,
+ "loss": 0.2179,
+ "step": 3402
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012855838289290821,
+ "loss": 0.2394,
+ "step": 3403
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.0001285221938741451,
+ "loss": 0.2068,
+ "step": 3404
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012848600078873925,
+ "loss": 0.1961,
+ "step": 3405
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012844980364185108,
+ "loss": 0.2719,
+ "step": 3406
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012841360243864147,
+ "loss": 0.2009,
+ "step": 3407
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012837739718427196,
+ "loss": 0.2343,
+ "step": 3408
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012834118788390456,
+ "loss": 0.3161,
+ "step": 3409
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012830497454270205,
+ "loss": 0.1992,
+ "step": 3410
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012826875716582755,
+ "loss": 0.261,
+ "step": 3411
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012823253575844495,
+ "loss": 0.2403,
+ "step": 3412
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012819631032571854,
+ "loss": 0.2271,
+ "step": 3413
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012816008087281332,
+ "loss": 0.2062,
+ "step": 3414
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012812384740489485,
+ "loss": 0.2133,
+ "step": 3415
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012808760992712924,
+ "loss": 0.2372,
+ "step": 3416
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012805136844468309,
+ "loss": 0.2466,
+ "step": 3417
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012801512296272368,
+ "loss": 0.2456,
+ "step": 3418
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012797887348641883,
+ "loss": 0.2171,
+ "step": 3419
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012794262002093697,
+ "loss": 0.3038,
+ "step": 3420
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.000127906362571447,
+ "loss": 0.1868,
+ "step": 3421
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012787010114311844,
+ "loss": 0.2611,
+ "step": 3422
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012783383574112138,
+ "loss": 0.2131,
+ "step": 3423
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.0001277975663706265,
+ "loss": 0.2005,
+ "step": 3424
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.000127761293036805,
+ "loss": 0.2455,
+ "step": 3425
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.0001277250157448287,
+ "loss": 0.2837,
+ "step": 3426
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012768873449986988,
+ "loss": 0.2252,
+ "step": 3427
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012765244930710155,
+ "loss": 0.211,
+ "step": 3428
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012761616017169708,
+ "loss": 0.1831,
+ "step": 3429
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.0001275798670988306,
+ "loss": 0.1985,
+ "step": 3430
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012754357009367665,
+ "loss": 0.2341,
+ "step": 3431
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012750726916141046,
+ "loss": 0.2395,
+ "step": 3432
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012747096430720765,
+ "loss": 0.2183,
+ "step": 3433
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.0001274346555362446,
+ "loss": 0.2698,
+ "step": 3434
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012739834285369807,
+ "loss": 0.2104,
+ "step": 3435
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.0001273620262647455,
+ "loss": 0.2395,
+ "step": 3436
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012732570577456484,
+ "loss": 0.2218,
+ "step": 3437
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012728938138833462,
+ "loss": 0.2337,
+ "step": 3438
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012725305311123386,
+ "loss": 0.1958,
+ "step": 3439
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.0001272167209484422,
+ "loss": 0.2767,
+ "step": 3440
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012718038490513984,
+ "loss": 0.2238,
+ "step": 3441
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012714404498650743,
+ "loss": 0.2931,
+ "step": 3442
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012710770119772632,
+ "loss": 0.3166,
+ "step": 3443
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012707135354397836,
+ "loss": 0.1985,
+ "step": 3444
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012703500203044586,
+ "loss": 0.2208,
+ "step": 3445
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.0001269986466623118,
+ "loss": 0.2279,
+ "step": 3446
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012696228744475964,
+ "loss": 0.2656,
+ "step": 3447
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012692592438297341,
+ "loss": 0.2181,
+ "step": 3448
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012688955748213772,
+ "loss": 0.1994,
+ "step": 3449
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.0001268531867474377,
+ "loss": 0.2818,
+ "step": 3450
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012681681218405897,
+ "loss": 0.2277,
+ "step": 3451
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012678043379718782,
+ "loss": 0.2692,
+ "step": 3452
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012674405159201091,
+ "loss": 0.2664,
+ "step": 3453
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012670766557371565,
+ "loss": 0.2008,
+ "step": 3454
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012667127574748986,
+ "loss": 0.2382,
+ "step": 3455
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.0001266348821185219,
+ "loss": 0.2454,
+ "step": 3456
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.0001265984846920008,
+ "loss": 0.3547,
+ "step": 3457
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012656208347311594,
+ "loss": 0.2115,
+ "step": 3458
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012652567846705736,
+ "loss": 0.1929,
+ "step": 3459
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012648926967901567,
+ "loss": 0.2076,
+ "step": 3460
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012645285711418194,
+ "loss": 0.2045,
+ "step": 3461
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012641644077774776,
+ "loss": 0.2378,
+ "step": 3462
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.0001263800206749054,
+ "loss": 0.2674,
+ "step": 3463
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012634359681084752,
+ "loss": 0.2125,
+ "step": 3464
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012630716919076736,
+ "loss": 0.2097,
+ "step": 3465
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.0001262707378198587,
+ "loss": 0.2352,
+ "step": 3466
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.0001262343027033159,
+ "loss": 0.2105,
+ "step": 3467
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012619786384633375,
+ "loss": 0.2207,
+ "step": 3468
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.0001261614212541077,
+ "loss": 0.304,
+ "step": 3469
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012612497493183364,
+ "loss": 0.2239,
+ "step": 3470
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012608852488470802,
+ "loss": 0.2875,
+ "step": 3471
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.0001260520711179278,
+ "loss": 0.3197,
+ "step": 3472
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012601561363669058,
+ "loss": 0.1942,
+ "step": 3473
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012597915244619428,
+ "loss": 0.2117,
+ "step": 3474
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012594268755163754,
+ "loss": 0.2222,
+ "step": 3475
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012590621895821943,
+ "loss": 0.1871,
+ "step": 3476
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.0001258697466711396,
+ "loss": 0.2146,
+ "step": 3477
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.0001258332706955982,
+ "loss": 0.3307,
+ "step": 3478
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012579679103679592,
+ "loss": 0.2175,
+ "step": 3479
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012576030769993393,
+ "loss": 0.2976,
+ "step": 3480
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.000125723820690214,
+ "loss": 0.2031,
+ "step": 3481
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012568733001283827,
+ "loss": 0.2046,
+ "step": 3482
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.0001256508356730097,
+ "loss": 0.2642,
+ "step": 3483
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012561433767593145,
+ "loss": 0.2088,
+ "step": 3484
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001255778360268074,
+ "loss": 0.2458,
+ "step": 3485
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001255413307308418,
+ "loss": 0.2237,
+ "step": 3486
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012550482179323963,
+ "loss": 0.2696,
+ "step": 3487
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012546830921920617,
+ "loss": 0.2078,
+ "step": 3488
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012543179301394744,
+ "loss": 0.2199,
+ "step": 3489
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001253952731826697,
+ "loss": 0.2258,
+ "step": 3490
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012535874973057997,
+ "loss": 0.1981,
+ "step": 3491
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001253222226628857,
+ "loss": 0.3252,
+ "step": 3492
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012528569198479481,
+ "loss": 0.2717,
+ "step": 3493
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.0001252491577015158,
+ "loss": 0.248,
+ "step": 3494
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012521261981825768,
+ "loss": 0.2725,
+ "step": 3495
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012517607834022993,
+ "loss": 0.2203,
+ "step": 3496
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.0001251395332726426,
+ "loss": 0.2461,
+ "step": 3497
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012510298462070619,
+ "loss": 0.3018,
+ "step": 3498
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.0001250664323896317,
+ "loss": 0.2329,
+ "step": 3499
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012502987658463075,
+ "loss": 0.221,
+ "step": 3500
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012499331721091544,
+ "loss": 0.2812,
+ "step": 3501
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012495675427369823,
+ "loss": 0.2846,
+ "step": 3502
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012492018777819226,
+ "loss": 0.2447,
+ "step": 3503
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.0001248836177296111,
+ "loss": 0.1969,
+ "step": 3504
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012484704413316878,
+ "loss": 0.2045,
+ "step": 3505
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012481046699408004,
+ "loss": 0.1862,
+ "step": 3506
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012477388631755985,
+ "loss": 0.23,
+ "step": 3507
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.0001247373021088239,
+ "loss": 0.2972,
+ "step": 3508
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012470071437308827,
+ "loss": 0.2222,
+ "step": 3509
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012466412311556952,
+ "loss": 0.2262,
+ "step": 3510
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012462752834148486,
+ "loss": 0.3642,
+ "step": 3511
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.0001245909300560518,
+ "loss": 0.2221,
+ "step": 3512
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012455432826448862,
+ "loss": 0.2607,
+ "step": 3513
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012451772297201376,
+ "loss": 0.2396,
+ "step": 3514
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012448111418384645,
+ "loss": 0.2034,
+ "step": 3515
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012444450190520623,
+ "loss": 0.2404,
+ "step": 3516
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012440788614131329,
+ "loss": 0.2029,
+ "step": 3517
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012437126689738816,
+ "loss": 0.2128,
+ "step": 3518
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012433464417865202,
+ "loss": 0.2857,
+ "step": 3519
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.0001242980179903264,
+ "loss": 0.2931,
+ "step": 3520
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012426138833763342,
+ "loss": 0.2319,
+ "step": 3521
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012422475522579573,
+ "loss": 0.2272,
+ "step": 3522
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012418811866003632,
+ "loss": 0.2498,
+ "step": 3523
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012415147864557884,
+ "loss": 0.1993,
+ "step": 3524
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.0001241148351876473,
+ "loss": 0.2329,
+ "step": 3525
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001240781882914663,
+ "loss": 0.2228,
+ "step": 3526
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012404153796226087,
+ "loss": 0.2228,
+ "step": 3527
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012400488420525653,
+ "loss": 0.2277,
+ "step": 3528
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001239682270256793,
+ "loss": 0.2344,
+ "step": 3529
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001239315664287558,
+ "loss": 0.2043,
+ "step": 3530
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001238949024197129,
+ "loss": 0.2143,
+ "step": 3531
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012385823500377812,
+ "loss": 0.2054,
+ "step": 3532
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012382156418617947,
+ "loss": 0.2191,
+ "step": 3533
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001237848899721454,
+ "loss": 0.2199,
+ "step": 3534
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012374821236690482,
+ "loss": 0.1899,
+ "step": 3535
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.0001237115313756872,
+ "loss": 0.2206,
+ "step": 3536
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012367484700372242,
+ "loss": 0.2107,
+ "step": 3537
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012363815925624087,
+ "loss": 0.1904,
+ "step": 3538
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012360146813847345,
+ "loss": 0.2259,
+ "step": 3539
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012356477365565148,
+ "loss": 0.2488,
+ "step": 3540
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012352807581300678,
+ "loss": 0.3026,
+ "step": 3541
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012349137461577174,
+ "loss": 0.2141,
+ "step": 3542
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012345467006917907,
+ "loss": 0.2183,
+ "step": 3543
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012341796217846208,
+ "loss": 0.2978,
+ "step": 3544
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.0001233812509488545,
+ "loss": 0.2255,
+ "step": 3545
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012334453638559057,
+ "loss": 0.2209,
+ "step": 3546
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012330781849390494,
+ "loss": 0.2464,
+ "step": 3547
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012327109727903283,
+ "loss": 0.2259,
+ "step": 3548
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012323437274620983,
+ "loss": 0.209,
+ "step": 3549
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.0001231976449006721,
+ "loss": 0.2424,
+ "step": 3550
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012316091374765624,
+ "loss": 0.2162,
+ "step": 3551
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.0001231241792923993,
+ "loss": 0.2442,
+ "step": 3552
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012308744154013878,
+ "loss": 0.2061,
+ "step": 3553
+ },
+ {
+ "epoch": 4.21,
+ "eval_loss": 3.390720844268799,
+ "eval_runtime": 283.8935,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 3553
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012305070049611273,
+ "loss": 0.1838,
+ "step": 3554
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012301395616555957,
+ "loss": 0.197,
+ "step": 3555
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.0001229772085537183,
+ "loss": 0.2479,
+ "step": 3556
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012294045766582823,
+ "loss": 0.3272,
+ "step": 3557
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012290370350712937,
+ "loss": 0.2301,
+ "step": 3558
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012286694608286197,
+ "loss": 0.2367,
+ "step": 3559
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012283018539826685,
+ "loss": 0.2419,
+ "step": 3560
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.0001227934214585853,
+ "loss": 0.2605,
+ "step": 3561
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.000122756654269059,
+ "loss": 0.2084,
+ "step": 3562
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012271988383493024,
+ "loss": 0.2414,
+ "step": 3563
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012268311016144163,
+ "loss": 0.2206,
+ "step": 3564
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.0001226463332538363,
+ "loss": 0.2012,
+ "step": 3565
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012260955311735786,
+ "loss": 0.1884,
+ "step": 3566
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012257276975725028,
+ "loss": 0.2155,
+ "step": 3567
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.0001225359831787581,
+ "loss": 0.2375,
+ "step": 3568
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012249919338712636,
+ "loss": 0.2713,
+ "step": 3569
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012246240038760043,
+ "loss": 0.2414,
+ "step": 3570
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012242560418542612,
+ "loss": 0.2209,
+ "step": 3571
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012238880478584985,
+ "loss": 0.2318,
+ "step": 3572
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012235200219411836,
+ "loss": 0.2858,
+ "step": 3573
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.000122315196415479,
+ "loss": 0.2468,
+ "step": 3574
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012227838745517932,
+ "loss": 0.2166,
+ "step": 3575
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001222415753184676,
+ "loss": 0.2349,
+ "step": 3576
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012220476001059238,
+ "loss": 0.2486,
+ "step": 3577
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012216794153680274,
+ "loss": 0.234,
+ "step": 3578
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012213111990234815,
+ "loss": 0.2008,
+ "step": 3579
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012209429511247864,
+ "loss": 0.2548,
+ "step": 3580
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001220574671724446,
+ "loss": 0.2562,
+ "step": 3581
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001220206360874969,
+ "loss": 0.2586,
+ "step": 3582
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001219838018628868,
+ "loss": 0.2428,
+ "step": 3583
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012194696450386608,
+ "loss": 0.2159,
+ "step": 3584
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012191012401568698,
+ "loss": 0.2544,
+ "step": 3585
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.0001218732804036021,
+ "loss": 0.2396,
+ "step": 3586
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012183643367286462,
+ "loss": 0.2335,
+ "step": 3587
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012179958382872796,
+ "loss": 0.2275,
+ "step": 3588
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012176273087644619,
+ "loss": 0.2291,
+ "step": 3589
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.0001217258748212737,
+ "loss": 0.2272,
+ "step": 3590
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012168901566846535,
+ "loss": 0.2135,
+ "step": 3591
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012165215342327648,
+ "loss": 0.22,
+ "step": 3592
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012161528809096285,
+ "loss": 0.2577,
+ "step": 3593
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012157841967678063,
+ "loss": 0.2006,
+ "step": 3594
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012154154818598647,
+ "loss": 0.2322,
+ "step": 3595
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.0001215046736238374,
+ "loss": 0.196,
+ "step": 3596
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012146779599559095,
+ "loss": 0.2267,
+ "step": 3597
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012143091530650508,
+ "loss": 0.2416,
+ "step": 3598
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012139403156183817,
+ "loss": 0.2585,
+ "step": 3599
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012135714476684903,
+ "loss": 0.2644,
+ "step": 3600
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012132025492679693,
+ "loss": 0.2355,
+ "step": 3601
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012128336204694148,
+ "loss": 0.2363,
+ "step": 3602
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012124646613254291,
+ "loss": 0.2476,
+ "step": 3603
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.0001212095671888617,
+ "loss": 0.2185,
+ "step": 3604
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012117266522115889,
+ "loss": 0.2233,
+ "step": 3605
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012113576023469582,
+ "loss": 0.2084,
+ "step": 3606
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012109885223473439,
+ "loss": 0.2439,
+ "step": 3607
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012106194122653684,
+ "loss": 0.2409,
+ "step": 3608
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012102502721536595,
+ "loss": 0.2183,
+ "step": 3609
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012098811020648475,
+ "loss": 0.2595,
+ "step": 3610
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012095119020515691,
+ "loss": 0.2135,
+ "step": 3611
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.0001209142672166463,
+ "loss": 0.2125,
+ "step": 3612
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012087734124621742,
+ "loss": 0.2017,
+ "step": 3613
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012084041229913505,
+ "loss": 0.2163,
+ "step": 3614
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012080348038066452,
+ "loss": 0.2198,
+ "step": 3615
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012076654549607145,
+ "loss": 0.2234,
+ "step": 3616
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012072960765062197,
+ "loss": 0.2201,
+ "step": 3617
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012069266684958265,
+ "loss": 0.245,
+ "step": 3618
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012065572309822037,
+ "loss": 0.2067,
+ "step": 3619
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012061877640180255,
+ "loss": 0.2284,
+ "step": 3620
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.000120581826765597,
+ "loss": 0.2323,
+ "step": 3621
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012054487419487188,
+ "loss": 0.2162,
+ "step": 3622
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012050791869489586,
+ "loss": 0.2131,
+ "step": 3623
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012047096027093798,
+ "loss": 0.2168,
+ "step": 3624
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012043399892826768,
+ "loss": 0.2293,
+ "step": 3625
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012039703467215488,
+ "loss": 0.2202,
+ "step": 3626
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012036006750786985,
+ "loss": 0.2288,
+ "step": 3627
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012032309744068334,
+ "loss": 0.2606,
+ "step": 3628
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012028612447586643,
+ "loss": 0.2754,
+ "step": 3629
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012024914861869063,
+ "loss": 0.239,
+ "step": 3630
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012021216987442798,
+ "loss": 0.2312,
+ "step": 3631
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012017518824835077,
+ "loss": 0.2299,
+ "step": 3632
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012013820374573184,
+ "loss": 0.2214,
+ "step": 3633
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012010121637184428,
+ "loss": 0.2492,
+ "step": 3634
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00012006422613196178,
+ "loss": 0.2659,
+ "step": 3635
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00012002723303135826,
+ "loss": 0.23,
+ "step": 3636
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011999023707530819,
+ "loss": 0.287,
+ "step": 3637
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011995323826908635,
+ "loss": 0.2204,
+ "step": 3638
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011991623661796798,
+ "loss": 0.2277,
+ "step": 3639
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011987923212722872,
+ "loss": 0.2436,
+ "step": 3640
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011984222480214456,
+ "loss": 0.2074,
+ "step": 3641
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011980521464799198,
+ "loss": 0.2212,
+ "step": 3642
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011976820167004779,
+ "loss": 0.2147,
+ "step": 3643
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011973118587358928,
+ "loss": 0.2271,
+ "step": 3644
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011969416726389404,
+ "loss": 0.2498,
+ "step": 3645
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011965714584624012,
+ "loss": 0.2171,
+ "step": 3646
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011962012162590601,
+ "loss": 0.2276,
+ "step": 3647
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011958309460817052,
+ "loss": 0.2089,
+ "step": 3648
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011954606479831291,
+ "loss": 0.2691,
+ "step": 3649
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011950903220161285,
+ "loss": 0.2229,
+ "step": 3650
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011947199682335031,
+ "loss": 0.2315,
+ "step": 3651
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.0001194349586688058,
+ "loss": 0.2208,
+ "step": 3652
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.0001193979177432601,
+ "loss": 0.2159,
+ "step": 3653
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011936087405199446,
+ "loss": 0.2781,
+ "step": 3654
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011932382760029049,
+ "loss": 0.2142,
+ "step": 3655
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011928677839343026,
+ "loss": 0.2275,
+ "step": 3656
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.0001192497264366961,
+ "loss": 0.2718,
+ "step": 3657
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011921267173537086,
+ "loss": 0.1947,
+ "step": 3658
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011917561429473771,
+ "loss": 0.2361,
+ "step": 3659
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011913855412008023,
+ "loss": 0.1999,
+ "step": 3660
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011910149121668241,
+ "loss": 0.2199,
+ "step": 3661
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011906442558982865,
+ "loss": 0.2217,
+ "step": 3662
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.0001190273572448036,
+ "loss": 0.2263,
+ "step": 3663
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011899028618689247,
+ "loss": 0.2216,
+ "step": 3664
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011895321242138075,
+ "loss": 0.2298,
+ "step": 3665
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.0001189161359535544,
+ "loss": 0.2332,
+ "step": 3666
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011887905678869966,
+ "loss": 0.2955,
+ "step": 3667
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011884197493210328,
+ "loss": 0.2352,
+ "step": 3668
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011880489038905223,
+ "loss": 0.2104,
+ "step": 3669
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011876780316483401,
+ "loss": 0.2897,
+ "step": 3670
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011873071326473644,
+ "loss": 0.2041,
+ "step": 3671
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011869362069404775,
+ "loss": 0.2242,
+ "step": 3672
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.0001186565254580565,
+ "loss": 0.2015,
+ "step": 3673
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011861942756205169,
+ "loss": 0.2716,
+ "step": 3674
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011858232701132264,
+ "loss": 0.2504,
+ "step": 3675
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011854522381115908,
+ "loss": 0.1846,
+ "step": 3676
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011850811796685117,
+ "loss": 0.207,
+ "step": 3677
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011847100948368937,
+ "loss": 0.2228,
+ "step": 3678
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011843389836696447,
+ "loss": 0.2365,
+ "step": 3679
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011839678462196784,
+ "loss": 0.2159,
+ "step": 3680
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011835966825399096,
+ "loss": 0.2413,
+ "step": 3681
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011832254926832586,
+ "loss": 0.2596,
+ "step": 3682
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011828542767026493,
+ "loss": 0.2041,
+ "step": 3683
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011824830346510089,
+ "loss": 0.2512,
+ "step": 3684
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011821117665812682,
+ "loss": 0.2165,
+ "step": 3685
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011817404725463618,
+ "loss": 0.2125,
+ "step": 3686
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011813691525992286,
+ "loss": 0.2557,
+ "step": 3687
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011809978067928102,
+ "loss": 0.2088,
+ "step": 3688
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011806264351800526,
+ "loss": 0.2093,
+ "step": 3689
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.0001180255037813906,
+ "loss": 0.2217,
+ "step": 3690
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011798836147473225,
+ "loss": 0.2681,
+ "step": 3691
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011795121660332593,
+ "loss": 0.2257,
+ "step": 3692
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.0001179140691724677,
+ "loss": 0.2422,
+ "step": 3693
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011787691918745396,
+ "loss": 0.3328,
+ "step": 3694
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.0001178397666535815,
+ "loss": 0.233,
+ "step": 3695
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011780261157614747,
+ "loss": 0.243,
+ "step": 3696
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011776545396044936,
+ "loss": 0.2089,
+ "step": 3697
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011772829381178502,
+ "loss": 0.2143,
+ "step": 3698
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011769113113545267,
+ "loss": 0.2135,
+ "step": 3699
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011765396593675097,
+ "loss": 0.2403,
+ "step": 3700
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011761679822097877,
+ "loss": 0.2182,
+ "step": 3701
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011757962799343547,
+ "loss": 0.2159,
+ "step": 3702
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011754245525942065,
+ "loss": 0.2098,
+ "step": 3703
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011750528002423437,
+ "loss": 0.2264,
+ "step": 3704
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.000117468102293177,
+ "loss": 0.2023,
+ "step": 3705
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011743092207154929,
+ "loss": 0.2978,
+ "step": 3706
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.0001173937393646523,
+ "loss": 0.2311,
+ "step": 3707
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.0001173565541777875,
+ "loss": 0.244,
+ "step": 3708
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011731936651625668,
+ "loss": 0.2058,
+ "step": 3709
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011728217638536197,
+ "loss": 0.3039,
+ "step": 3710
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011724498379040587,
+ "loss": 0.2142,
+ "step": 3711
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.0001172077887366913,
+ "loss": 0.2262,
+ "step": 3712
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011717059122952136,
+ "loss": 0.2304,
+ "step": 3713
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011713339127419969,
+ "loss": 0.2093,
+ "step": 3714
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011709618887603014,
+ "loss": 0.2083,
+ "step": 3715
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011705898404031697,
+ "loss": 0.3559,
+ "step": 3716
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011702177677236479,
+ "loss": 0.2728,
+ "step": 3717
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011698456707747854,
+ "loss": 0.246,
+ "step": 3718
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011694735496096354,
+ "loss": 0.2031,
+ "step": 3719
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011691014042812536,
+ "loss": 0.2049,
+ "step": 3720
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011687292348427004,
+ "loss": 0.248,
+ "step": 3721
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011683570413470383,
+ "loss": 0.2189,
+ "step": 3722
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011679848238473352,
+ "loss": 0.2302,
+ "step": 3723
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011676125823966602,
+ "loss": 0.2839,
+ "step": 3724
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011672403170480872,
+ "loss": 0.2359,
+ "step": 3725
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011668680278546929,
+ "loss": 0.2288,
+ "step": 3726
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.0001166495714869558,
+ "loss": 0.2718,
+ "step": 3727
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011661233781457654,
+ "loss": 0.1967,
+ "step": 3728
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011657510177364032,
+ "loss": 0.2098,
+ "step": 3729
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011653786336945614,
+ "loss": 0.2466,
+ "step": 3730
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011650062260733339,
+ "loss": 0.2207,
+ "step": 3731
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011646337949258175,
+ "loss": 0.2124,
+ "step": 3732
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011642613403051133,
+ "loss": 0.213,
+ "step": 3733
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011638888622643249,
+ "loss": 0.2276,
+ "step": 3734
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.000116351636085656,
+ "loss": 0.2206,
+ "step": 3735
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011631438361349287,
+ "loss": 0.2382,
+ "step": 3736
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011627712881525452,
+ "loss": 0.2264,
+ "step": 3737
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011623987169625261,
+ "loss": 0.2392,
+ "step": 3738
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011620261226179927,
+ "loss": 0.2139,
+ "step": 3739
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011616535051720685,
+ "loss": 0.2103,
+ "step": 3740
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011612808646778806,
+ "loss": 0.211,
+ "step": 3741
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011609082011885592,
+ "loss": 0.2227,
+ "step": 3742
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011605355147572387,
+ "loss": 0.2459,
+ "step": 3743
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011601628054370553,
+ "loss": 0.2312,
+ "step": 3744
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011597900732811496,
+ "loss": 0.2244,
+ "step": 3745
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011594173183426647,
+ "loss": 0.2168,
+ "step": 3746
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011590445406747479,
+ "loss": 0.2711,
+ "step": 3747
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011586717403305487,
+ "loss": 0.1865,
+ "step": 3748
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011582989173632206,
+ "loss": 0.3104,
+ "step": 3749
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011579260718259197,
+ "loss": 0.2245,
+ "step": 3750
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011575532037718057,
+ "loss": 0.2316,
+ "step": 3751
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011571803132540418,
+ "loss": 0.2328,
+ "step": 3752
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011568074003257938,
+ "loss": 0.267,
+ "step": 3753
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.0001156434465040231,
+ "loss": 0.2131,
+ "step": 3754
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.0001156061507450526,
+ "loss": 0.1945,
+ "step": 3755
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011556885276098536,
+ "loss": 0.2344,
+ "step": 3756
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011553155255713937,
+ "loss": 0.2221,
+ "step": 3757
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011549425013883275,
+ "loss": 0.2098,
+ "step": 3758
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011545694551138409,
+ "loss": 0.2329,
+ "step": 3759
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011541963868011212,
+ "loss": 0.2187,
+ "step": 3760
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011538232965033601,
+ "loss": 0.1928,
+ "step": 3761
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011534501842737527,
+ "loss": 0.2103,
+ "step": 3762
+ },
+ {
+ "epoch": 4.46,
+ "eval_loss": 3.44382643699646,
+ "eval_runtime": 283.899,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 3762
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011530770501654959,
+ "loss": 0.2563,
+ "step": 3763
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011527038942317911,
+ "loss": 0.1922,
+ "step": 3764
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011523307165258419,
+ "loss": 0.2246,
+ "step": 3765
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011519575171008552,
+ "loss": 0.2243,
+ "step": 3766
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011515842960100411,
+ "loss": 0.2481,
+ "step": 3767
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011512110533066132,
+ "loss": 0.2135,
+ "step": 3768
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011508377890437874,
+ "loss": 0.2019,
+ "step": 3769
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011504645032747832,
+ "loss": 0.2537,
+ "step": 3770
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011500911960528229,
+ "loss": 0.2131,
+ "step": 3771
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011497178674311317,
+ "loss": 0.2421,
+ "step": 3772
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011493445174629386,
+ "loss": 0.2012,
+ "step": 3773
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011489711462014751,
+ "loss": 0.2144,
+ "step": 3774
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011485977536999757,
+ "loss": 0.2411,
+ "step": 3775
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011482243400116779,
+ "loss": 0.192,
+ "step": 3776
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011478509051898225,
+ "loss": 0.2245,
+ "step": 3777
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011474774492876532,
+ "loss": 0.241,
+ "step": 3778
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011471039723584162,
+ "loss": 0.2172,
+ "step": 3779
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011467304744553618,
+ "loss": 0.2308,
+ "step": 3780
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011463569556317424,
+ "loss": 0.2523,
+ "step": 3781
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011459834159408137,
+ "loss": 0.216,
+ "step": 3782
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011456098554358342,
+ "loss": 0.2098,
+ "step": 3783
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011452362741700655,
+ "loss": 0.2101,
+ "step": 3784
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011448626721967717,
+ "loss": 0.3598,
+ "step": 3785
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011444890495692213,
+ "loss": 0.2131,
+ "step": 3786
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011441154063406841,
+ "loss": 0.3067,
+ "step": 3787
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011437417425644337,
+ "loss": 0.2866,
+ "step": 3788
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011433680582937461,
+ "loss": 0.2688,
+ "step": 3789
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011429943535819005,
+ "loss": 0.2286,
+ "step": 3790
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011426206284821792,
+ "loss": 0.215,
+ "step": 3791
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011422468830478679,
+ "loss": 0.2293,
+ "step": 3792
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011418731173322532,
+ "loss": 0.2614,
+ "step": 3793
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011414993313886272,
+ "loss": 0.2223,
+ "step": 3794
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011411255252702829,
+ "loss": 0.2415,
+ "step": 3795
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011407516990305169,
+ "loss": 0.2429,
+ "step": 3796
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.0001140377852722629,
+ "loss": 0.2862,
+ "step": 3797
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011400039863999214,
+ "loss": 0.2399,
+ "step": 3798
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011396301001156992,
+ "loss": 0.915,
+ "step": 3799
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011392561939232706,
+ "loss": 0.2398,
+ "step": 3800
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011388822678759464,
+ "loss": 0.2817,
+ "step": 3801
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011385083220270401,
+ "loss": 0.2224,
+ "step": 3802
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011381343564298683,
+ "loss": 0.2319,
+ "step": 3803
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011377603711377504,
+ "loss": 0.2269,
+ "step": 3804
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011373863662040087,
+ "loss": 0.2552,
+ "step": 3805
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011370123416819682,
+ "loss": 0.2335,
+ "step": 3806
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011366382976249564,
+ "loss": 0.2197,
+ "step": 3807
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011362642340863034,
+ "loss": 0.2433,
+ "step": 3808
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011358901511193431,
+ "loss": 0.2135,
+ "step": 3809
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.0001135516048777412,
+ "loss": 0.2488,
+ "step": 3810
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.0001135141927113848,
+ "loss": 0.2426,
+ "step": 3811
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.0001134767786181993,
+ "loss": 0.247,
+ "step": 3812
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011343936260351913,
+ "loss": 0.2235,
+ "step": 3813
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011340194467267901,
+ "loss": 0.2109,
+ "step": 3814
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011336452483101394,
+ "loss": 0.2545,
+ "step": 3815
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011332710308385914,
+ "loss": 0.2104,
+ "step": 3816
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011328967943655016,
+ "loss": 0.2089,
+ "step": 3817
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011325225389442277,
+ "loss": 0.2658,
+ "step": 3818
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011321482646281301,
+ "loss": 0.2736,
+ "step": 3819
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011317739714705731,
+ "loss": 0.2562,
+ "step": 3820
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011313996595249219,
+ "loss": 0.2223,
+ "step": 3821
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011310253288445456,
+ "loss": 0.2212,
+ "step": 3822
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011306509794828153,
+ "loss": 0.2217,
+ "step": 3823
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011302766114931054,
+ "loss": 0.2321,
+ "step": 3824
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011299022249287922,
+ "loss": 0.2423,
+ "step": 3825
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011295278198432557,
+ "loss": 0.2651,
+ "step": 3826
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.0001129153396289877,
+ "loss": 0.2256,
+ "step": 3827
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011287789543220417,
+ "loss": 0.2656,
+ "step": 3828
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011284044939931364,
+ "loss": 0.2332,
+ "step": 3829
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.0001128030015356551,
+ "loss": 0.2121,
+ "step": 3830
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011276555184656783,
+ "loss": 0.2148,
+ "step": 3831
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011272810033739135,
+ "loss": 0.234,
+ "step": 3832
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011269064701346534,
+ "loss": 0.2466,
+ "step": 3833
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011265319188012994,
+ "loss": 0.2008,
+ "step": 3834
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011261573494272538,
+ "loss": 0.1905,
+ "step": 3835
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011257827620659216,
+ "loss": 0.2515,
+ "step": 3836
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011254081567707115,
+ "loss": 0.2579,
+ "step": 3837
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011250335335950342,
+ "loss": 0.2598,
+ "step": 3838
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011246588925923018,
+ "loss": 0.2399,
+ "step": 3839
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011242842338159309,
+ "loss": 0.2181,
+ "step": 3840
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.0001123909557319339,
+ "loss": 0.2744,
+ "step": 3841
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011235348631559473,
+ "loss": 0.2149,
+ "step": 3842
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011231601513791786,
+ "loss": 0.2184,
+ "step": 3843
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.0001122785422042459,
+ "loss": 0.2098,
+ "step": 3844
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011224106751992163,
+ "loss": 0.2277,
+ "step": 3845
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011220359109028815,
+ "loss": 0.2571,
+ "step": 3846
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011216611292068881,
+ "loss": 0.2087,
+ "step": 3847
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.0001121286330164671,
+ "loss": 0.2497,
+ "step": 3848
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011209115138296693,
+ "loss": 0.1869,
+ "step": 3849
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.0001120536680255323,
+ "loss": 0.239,
+ "step": 3850
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011201618294950756,
+ "loss": 0.2018,
+ "step": 3851
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011197869616023722,
+ "loss": 0.2751,
+ "step": 3852
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011194120766306611,
+ "loss": 0.2526,
+ "step": 3853
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011190371746333923,
+ "loss": 0.2657,
+ "step": 3854
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011186622556640194,
+ "loss": 0.2659,
+ "step": 3855
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011182873197759971,
+ "loss": 0.2401,
+ "step": 3856
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011179123670227833,
+ "loss": 0.2299,
+ "step": 3857
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011175373974578378,
+ "loss": 0.2249,
+ "step": 3858
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011171624111346232,
+ "loss": 0.2457,
+ "step": 3859
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011167874081066045,
+ "loss": 0.192,
+ "step": 3860
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011164123884272493,
+ "loss": 0.2591,
+ "step": 3861
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011160373521500264,
+ "loss": 0.2632,
+ "step": 3862
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011156622993284084,
+ "loss": 0.248,
+ "step": 3863
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011152872300158694,
+ "loss": 0.2071,
+ "step": 3864
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011149121442658861,
+ "loss": 0.2935,
+ "step": 3865
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011145370421319377,
+ "loss": 0.2191,
+ "step": 3866
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011141619236675056,
+ "loss": 0.2737,
+ "step": 3867
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011137867889260734,
+ "loss": 0.2281,
+ "step": 3868
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011134116379611273,
+ "loss": 0.2083,
+ "step": 3869
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011130364708261552,
+ "loss": 0.2079,
+ "step": 3870
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011126612875746479,
+ "loss": 0.2423,
+ "step": 3871
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011122860882600986,
+ "loss": 0.1903,
+ "step": 3872
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011119108729360026,
+ "loss": 0.1995,
+ "step": 3873
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.0001111535641655857,
+ "loss": 0.2479,
+ "step": 3874
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011111603944731623,
+ "loss": 0.198,
+ "step": 3875
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011107851314414197,
+ "loss": 0.2242,
+ "step": 3876
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.0001110409852614134,
+ "loss": 0.29,
+ "step": 3877
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011100345580448118,
+ "loss": 0.1931,
+ "step": 3878
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011096592477869616,
+ "loss": 0.2195,
+ "step": 3879
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.0001109283921894095,
+ "loss": 0.2383,
+ "step": 3880
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011089085804197248,
+ "loss": 0.2729,
+ "step": 3881
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011085332234173664,
+ "loss": 0.1836,
+ "step": 3882
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011081578509405382,
+ "loss": 0.2724,
+ "step": 3883
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011077824630427594,
+ "loss": 0.2027,
+ "step": 3884
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011074070597775527,
+ "loss": 0.2681,
+ "step": 3885
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011070316411984421,
+ "loss": 0.205,
+ "step": 3886
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.0001106656207358954,
+ "loss": 0.3106,
+ "step": 3887
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011062807583126172,
+ "loss": 0.2126,
+ "step": 3888
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011059052941129628,
+ "loss": 0.4017,
+ "step": 3889
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011055298148135236,
+ "loss": 0.2406,
+ "step": 3890
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011051543204678348,
+ "loss": 0.2833,
+ "step": 3891
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011047788111294333,
+ "loss": 0.2224,
+ "step": 3892
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.0001104403286851859,
+ "loss": 0.3536,
+ "step": 3893
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011040277476886533,
+ "loss": 0.2373,
+ "step": 3894
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011036521936933604,
+ "loss": 0.2297,
+ "step": 3895
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011032766249195252,
+ "loss": 0.1979,
+ "step": 3896
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011029010414206965,
+ "loss": 0.2434,
+ "step": 3897
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011025254432504233,
+ "loss": 0.2897,
+ "step": 3898
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011021498304622586,
+ "loss": 0.2121,
+ "step": 3899
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011017742031097563,
+ "loss": 0.3021,
+ "step": 3900
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00011013985612464726,
+ "loss": 0.2463,
+ "step": 3901
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.0001101022904925966,
+ "loss": 0.3078,
+ "step": 3902
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00011006472342017966,
+ "loss": 0.3664,
+ "step": 3903
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.0001100271549127527,
+ "loss": 0.2176,
+ "step": 3904
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.0001099895849756722,
+ "loss": 0.2137,
+ "step": 3905
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00010995201361429474,
+ "loss": 0.2588,
+ "step": 3906
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00010991444083397728,
+ "loss": 0.2686,
+ "step": 3907
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00010987686664007679,
+ "loss": 0.2235,
+ "step": 3908
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010983929103795059,
+ "loss": 0.2602,
+ "step": 3909
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.0001098017140329561,
+ "loss": 0.1857,
+ "step": 3910
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010976413563045094,
+ "loss": 0.2307,
+ "step": 3911
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010972655583579308,
+ "loss": 0.2658,
+ "step": 3912
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010968897465434051,
+ "loss": 0.2106,
+ "step": 3913
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010965139209145152,
+ "loss": 0.2122,
+ "step": 3914
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010961380815248454,
+ "loss": 0.2433,
+ "step": 3915
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.0001095762228427982,
+ "loss": 0.2032,
+ "step": 3916
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010953863616775138,
+ "loss": 0.3393,
+ "step": 3917
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010950104813270314,
+ "loss": 0.2476,
+ "step": 3918
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010946345874301264,
+ "loss": 0.1929,
+ "step": 3919
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.0001094258680040394,
+ "loss": 0.2509,
+ "step": 3920
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010938827592114294,
+ "loss": 0.2103,
+ "step": 3921
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010935068249968314,
+ "loss": 0.2297,
+ "step": 3922
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010931308774501998,
+ "loss": 0.2259,
+ "step": 3923
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010927549166251368,
+ "loss": 0.238,
+ "step": 3924
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010923789425752456,
+ "loss": 0.3147,
+ "step": 3925
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010920029553541326,
+ "loss": 0.2753,
+ "step": 3926
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010916269550154048,
+ "loss": 0.2399,
+ "step": 3927
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.0001091250941612672,
+ "loss": 0.2196,
+ "step": 3928
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010908749151995452,
+ "loss": 0.2326,
+ "step": 3929
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.0001090498875829638,
+ "loss": 0.2217,
+ "step": 3930
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010901228235565651,
+ "loss": 0.2012,
+ "step": 3931
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010897467584339434,
+ "loss": 0.2018,
+ "step": 3932
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010893706805153915,
+ "loss": 0.2382,
+ "step": 3933
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.000108899458985453,
+ "loss": 0.2202,
+ "step": 3934
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010886184865049813,
+ "loss": 0.2038,
+ "step": 3935
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010882423705203698,
+ "loss": 0.2406,
+ "step": 3936
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010878662419543206,
+ "loss": 0.2393,
+ "step": 3937
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010874901008604623,
+ "loss": 0.2626,
+ "step": 3938
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010871139472924237,
+ "loss": 0.246,
+ "step": 3939
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010867377813038366,
+ "loss": 0.2228,
+ "step": 3940
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010863616029483339,
+ "loss": 0.2091,
+ "step": 3941
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010859854122795508,
+ "loss": 0.215,
+ "step": 3942
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.0001085609209351123,
+ "loss": 0.2071,
+ "step": 3943
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010852329942166894,
+ "loss": 0.2208,
+ "step": 3944
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010848567669298901,
+ "loss": 0.1988,
+ "step": 3945
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010844805275443673,
+ "loss": 0.2129,
+ "step": 3946
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010841042761137634,
+ "loss": 0.3038,
+ "step": 3947
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010837280126917248,
+ "loss": 0.206,
+ "step": 3948
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010833517373318975,
+ "loss": 0.2648,
+ "step": 3949
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010829754500879308,
+ "loss": 0.2136,
+ "step": 3950
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.0001082599151013475,
+ "loss": 0.3746,
+ "step": 3951
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010822228401621819,
+ "loss": 0.2403,
+ "step": 3952
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010818465175877052,
+ "loss": 0.2288,
+ "step": 3953
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.0001081470183343701,
+ "loss": 0.2099,
+ "step": 3954
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010810938374838251,
+ "loss": 0.1992,
+ "step": 3955
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.0001080717480061737,
+ "loss": 0.2337,
+ "step": 3956
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010803411111310971,
+ "loss": 0.2127,
+ "step": 3957
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010799647307455674,
+ "loss": 0.2936,
+ "step": 3958
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010795883389588111,
+ "loss": 0.3019,
+ "step": 3959
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010792119358244939,
+ "loss": 0.2262,
+ "step": 3960
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010788355213962825,
+ "loss": 0.2561,
+ "step": 3961
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.0001078459095727845,
+ "loss": 0.2992,
+ "step": 3962
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.0001078082658872852,
+ "loss": 0.2081,
+ "step": 3963
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010777062108849756,
+ "loss": 0.2089,
+ "step": 3964
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010773297518178881,
+ "loss": 0.2112,
+ "step": 3965
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010769532817252653,
+ "loss": 0.1898,
+ "step": 3966
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010765768006607826,
+ "loss": 0.3229,
+ "step": 3967
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010762003086781185,
+ "loss": 0.2241,
+ "step": 3968
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010758238058309527,
+ "loss": 0.2814,
+ "step": 3969
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010754472921729661,
+ "loss": 0.2403,
+ "step": 3970
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010750707677578413,
+ "loss": 0.2715,
+ "step": 3971
+ },
+ {
+ "epoch": 4.71,
+ "eval_loss": 3.3954412937164307,
+ "eval_runtime": 283.9122,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 3971
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010746942326392628,
+ "loss": 0.2263,
+ "step": 3972
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010743176868709157,
+ "loss": 0.2433,
+ "step": 3973
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.0001073941130506488,
+ "loss": 0.2871,
+ "step": 3974
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010735645635996676,
+ "loss": 0.2416,
+ "step": 3975
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.0001073187986204145,
+ "loss": 0.2563,
+ "step": 3976
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010728113983736126,
+ "loss": 0.2502,
+ "step": 3977
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010724348001617625,
+ "loss": 0.2145,
+ "step": 3978
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.000107205819162229,
+ "loss": 0.2639,
+ "step": 3979
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010716815728088912,
+ "loss": 0.2279,
+ "step": 3980
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.0001071304943775264,
+ "loss": 0.2086,
+ "step": 3981
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010709283045751069,
+ "loss": 0.2142,
+ "step": 3982
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.0001070551655262121,
+ "loss": 0.2381,
+ "step": 3983
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010701749958900078,
+ "loss": 0.2313,
+ "step": 3984
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.0001069798326512471,
+ "loss": 0.1954,
+ "step": 3985
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010694216471832152,
+ "loss": 0.2253,
+ "step": 3986
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010690449579559469,
+ "loss": 0.2104,
+ "step": 3987
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010686682588843737,
+ "loss": 0.2172,
+ "step": 3988
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010682915500222051,
+ "loss": 0.2094,
+ "step": 3989
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010679148314231504,
+ "loss": 0.2885,
+ "step": 3990
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010675381031409225,
+ "loss": 0.3085,
+ "step": 3991
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010671613652292343,
+ "loss": 0.2515,
+ "step": 3992
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010667846177418003,
+ "loss": 0.2314,
+ "step": 3993
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010664078607323367,
+ "loss": 0.2473,
+ "step": 3994
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010660310942545608,
+ "loss": 0.2283,
+ "step": 3995
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010656543183621912,
+ "loss": 0.226,
+ "step": 3996
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010652775331089477,
+ "loss": 0.2169,
+ "step": 3997
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010649007385485519,
+ "loss": 0.2079,
+ "step": 3998
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010645239347347269,
+ "loss": 0.2437,
+ "step": 3999
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010641471217211958,
+ "loss": 0.2127,
+ "step": 4000
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010637702995616847,
+ "loss": 0.2527,
+ "step": 4001
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010633934683099196,
+ "loss": 0.2193,
+ "step": 4002
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.0001063016628019629,
+ "loss": 0.2744,
+ "step": 4003
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010626397787445416,
+ "loss": 0.2592,
+ "step": 4004
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010622629205383885,
+ "loss": 0.2107,
+ "step": 4005
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010618860534549006,
+ "loss": 0.1956,
+ "step": 4006
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010615091775478117,
+ "loss": 0.2546,
+ "step": 4007
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010611322928708555,
+ "loss": 0.2376,
+ "step": 4008
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010607553994777684,
+ "loss": 0.2359,
+ "step": 4009
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010603784974222861,
+ "loss": 0.2631,
+ "step": 4010
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010600015867581474,
+ "loss": 0.2602,
+ "step": 4011
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010596246675390911,
+ "loss": 0.2043,
+ "step": 4012
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010592477398188575,
+ "loss": 0.2325,
+ "step": 4013
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.0001058870803651189,
+ "loss": 0.2395,
+ "step": 4014
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010584938590898281,
+ "loss": 0.2205,
+ "step": 4015
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010581169061885185,
+ "loss": 0.2169,
+ "step": 4016
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010577399450010062,
+ "loss": 0.1986,
+ "step": 4017
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.0001057362975581037,
+ "loss": 0.2011,
+ "step": 4018
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010569859979823586,
+ "loss": 0.2208,
+ "step": 4019
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.000105660901225872,
+ "loss": 0.2478,
+ "step": 4020
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010562320184638714,
+ "loss": 0.1936,
+ "step": 4021
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010558550166515633,
+ "loss": 0.2719,
+ "step": 4022
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010554780068755483,
+ "loss": 0.2873,
+ "step": 4023
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010551009891895796,
+ "loss": 0.1993,
+ "step": 4024
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010547239636474115,
+ "loss": 0.2174,
+ "step": 4025
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010543469303028002,
+ "loss": 0.2009,
+ "step": 4026
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010539698892095021,
+ "loss": 0.2038,
+ "step": 4027
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.0001053592840421275,
+ "loss": 0.2119,
+ "step": 4028
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010532157839918779,
+ "loss": 0.242,
+ "step": 4029
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010528387199750707,
+ "loss": 0.2026,
+ "step": 4030
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010524616484246146,
+ "loss": 0.2445,
+ "step": 4031
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010520845693942719,
+ "loss": 0.2793,
+ "step": 4032
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010517074829378057,
+ "loss": 0.2658,
+ "step": 4033
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010513303891089803,
+ "loss": 0.2069,
+ "step": 4034
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010509532879615614,
+ "loss": 0.2211,
+ "step": 4035
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010505761795493145,
+ "loss": 0.2078,
+ "step": 4036
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010501990639260079,
+ "loss": 0.2796,
+ "step": 4037
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010498219411454098,
+ "loss": 0.2201,
+ "step": 4038
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.000104944481126129,
+ "loss": 0.198,
+ "step": 4039
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010490676743274181,
+ "loss": 0.2182,
+ "step": 4040
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010486905303975664,
+ "loss": 0.216,
+ "step": 4041
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010483133795255071,
+ "loss": 0.2365,
+ "step": 4042
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010479362217650137,
+ "loss": 0.2472,
+ "step": 4043
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.0001047559057169861,
+ "loss": 0.2259,
+ "step": 4044
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010471818857938238,
+ "loss": 0.2306,
+ "step": 4045
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010468047076906793,
+ "loss": 0.2689,
+ "step": 4046
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.0001046427522914204,
+ "loss": 0.2361,
+ "step": 4047
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010460503315181768,
+ "loss": 0.2919,
+ "step": 4048
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010456731335563769,
+ "loss": 0.2397,
+ "step": 4049
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010452959290825846,
+ "loss": 0.2144,
+ "step": 4050
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010449187181505804,
+ "loss": 0.258,
+ "step": 4051
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010445415008141473,
+ "loss": 0.2199,
+ "step": 4052
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010441642771270675,
+ "loss": 0.1817,
+ "step": 4053
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010437870471431251,
+ "loss": 0.2089,
+ "step": 4054
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010434098109161051,
+ "loss": 0.2047,
+ "step": 4055
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010430325684997928,
+ "loss": 0.2067,
+ "step": 4056
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010426553199479749,
+ "loss": 0.1996,
+ "step": 4057
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.0001042278065314439,
+ "loss": 0.2205,
+ "step": 4058
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.0001041900804652973,
+ "loss": 0.2508,
+ "step": 4059
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010415235380173662,
+ "loss": 0.2562,
+ "step": 4060
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010411462654614088,
+ "loss": 0.2199,
+ "step": 4061
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010407689870388916,
+ "loss": 0.2718,
+ "step": 4062
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010403917028036058,
+ "loss": 0.2292,
+ "step": 4063
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010400144128093448,
+ "loss": 0.3123,
+ "step": 4064
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010396371171099006,
+ "loss": 0.2814,
+ "step": 4065
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010392598157590688,
+ "loss": 0.231,
+ "step": 4066
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010388825088106433,
+ "loss": 0.2242,
+ "step": 4067
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010385051963184202,
+ "loss": 0.1998,
+ "step": 4068
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.0001038127878336196,
+ "loss": 0.1902,
+ "step": 4069
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010377505549177682,
+ "loss": 0.2198,
+ "step": 4070
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010373732261169346,
+ "loss": 0.2537,
+ "step": 4071
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010369958919874943,
+ "loss": 0.2267,
+ "step": 4072
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010366185525832467,
+ "loss": 0.2376,
+ "step": 4073
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010362412079579924,
+ "loss": 0.2076,
+ "step": 4074
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010358638581655322,
+ "loss": 0.2507,
+ "step": 4075
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010354865032596682,
+ "loss": 0.2077,
+ "step": 4076
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010351091432942029,
+ "loss": 0.2762,
+ "step": 4077
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010347317783229398,
+ "loss": 0.2232,
+ "step": 4078
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010343544083996824,
+ "loss": 0.2475,
+ "step": 4079
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010339770335782359,
+ "loss": 0.2108,
+ "step": 4080
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010335996539124055,
+ "loss": 0.2544,
+ "step": 4081
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010332222694559975,
+ "loss": 0.2253,
+ "step": 4082
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010328448802628183,
+ "loss": 0.2324,
+ "step": 4083
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010324674863866759,
+ "loss": 0.287,
+ "step": 4084
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.0001032090087881378,
+ "loss": 0.3515,
+ "step": 4085
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010317126848007337,
+ "loss": 0.2242,
+ "step": 4086
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.0001031335277198552,
+ "loss": 0.2242,
+ "step": 4087
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010309578651286436,
+ "loss": 0.1879,
+ "step": 4088
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010305804486448186,
+ "loss": 0.2261,
+ "step": 4089
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.0001030203027800889,
+ "loss": 0.2415,
+ "step": 4090
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010298256026506662,
+ "loss": 0.2141,
+ "step": 4091
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010294481732479635,
+ "loss": 0.2015,
+ "step": 4092
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.0001029070739646593,
+ "loss": 0.206,
+ "step": 4093
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010286933019003697,
+ "loss": 0.2598,
+ "step": 4094
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010283158600631072,
+ "loss": 0.2561,
+ "step": 4095
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010279384141886208,
+ "loss": 0.1914,
+ "step": 4096
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010275609643307258,
+ "loss": 0.2416,
+ "step": 4097
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010271835105432388,
+ "loss": 0.2012,
+ "step": 4098
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010268060528799754,
+ "loss": 0.3043,
+ "step": 4099
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010264285913947545,
+ "loss": 0.2331,
+ "step": 4100
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010260511261413923,
+ "loss": 0.237,
+ "step": 4101
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010256736571737083,
+ "loss": 0.2776,
+ "step": 4102
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010252961845455205,
+ "loss": 0.1938,
+ "step": 4103
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010249187083106486,
+ "loss": 0.2596,
+ "step": 4104
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010245412285229124,
+ "loss": 0.196,
+ "step": 4105
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010241637452361323,
+ "loss": 0.2369,
+ "step": 4106
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010237862585041293,
+ "loss": 0.2091,
+ "step": 4107
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010234087683807247,
+ "loss": 0.2273,
+ "step": 4108
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010230312749197406,
+ "loss": 0.3996,
+ "step": 4109
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010226537781749987,
+ "loss": 0.2382,
+ "step": 4110
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010222762782003223,
+ "loss": 0.2174,
+ "step": 4111
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010218987750495343,
+ "loss": 0.2569,
+ "step": 4112
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010215212687764593,
+ "loss": 0.2239,
+ "step": 4113
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010211437594349203,
+ "loss": 0.3192,
+ "step": 4114
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010207662470787427,
+ "loss": 0.2347,
+ "step": 4115
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010203887317617511,
+ "loss": 0.2461,
+ "step": 4116
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010200112135377709,
+ "loss": 0.2826,
+ "step": 4117
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010196336924606283,
+ "loss": 0.3531,
+ "step": 4118
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010192561685841496,
+ "loss": 0.2104,
+ "step": 4119
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010188786419621612,
+ "loss": 0.2257,
+ "step": 4120
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010185011126484903,
+ "loss": 0.2096,
+ "step": 4121
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.0001018123580696964,
+ "loss": 0.2009,
+ "step": 4122
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010177460461614108,
+ "loss": 0.3198,
+ "step": 4123
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010173685090956582,
+ "loss": 0.1979,
+ "step": 4124
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010169909695535354,
+ "loss": 0.2507,
+ "step": 4125
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010166134275888708,
+ "loss": 0.2295,
+ "step": 4126
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010162358832554937,
+ "loss": 0.2355,
+ "step": 4127
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010158583366072338,
+ "loss": 0.2253,
+ "step": 4128
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010154807876979213,
+ "loss": 0.3306,
+ "step": 4129
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010151032365813859,
+ "loss": 0.2265,
+ "step": 4130
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010147256833114586,
+ "loss": 0.2176,
+ "step": 4131
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.000101434812794197,
+ "loss": 0.2631,
+ "step": 4132
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010139705705267513,
+ "loss": 0.2241,
+ "step": 4133
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010135930111196338,
+ "loss": 0.2317,
+ "step": 4134
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.000101321544977445,
+ "loss": 0.2325,
+ "step": 4135
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010128378865450307,
+ "loss": 0.2011,
+ "step": 4136
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010124603214852093,
+ "loss": 0.232,
+ "step": 4137
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010120827546488174,
+ "loss": 0.2624,
+ "step": 4138
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010117051860896885,
+ "loss": 0.2452,
+ "step": 4139
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010113276158616553,
+ "loss": 0.2261,
+ "step": 4140
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010109500440185514,
+ "loss": 0.2378,
+ "step": 4141
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.000101057247061421,
+ "loss": 0.2172,
+ "step": 4142
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010101948957024647,
+ "loss": 0.2539,
+ "step": 4143
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010098173193371499,
+ "loss": 0.2178,
+ "step": 4144
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010094397415720991,
+ "loss": 0.2545,
+ "step": 4145
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010090621624611474,
+ "loss": 0.2233,
+ "step": 4146
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.0001008684582058129,
+ "loss": 0.2547,
+ "step": 4147
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010083070004168786,
+ "loss": 0.232,
+ "step": 4148
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010079294175912313,
+ "loss": 0.313,
+ "step": 4149
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010075518336350218,
+ "loss": 0.2234,
+ "step": 4150
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010071742486020854,
+ "loss": 0.2447,
+ "step": 4151
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010067966625462577,
+ "loss": 0.246,
+ "step": 4152
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010064190755213745,
+ "loss": 0.1836,
+ "step": 4153
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010060414875812709,
+ "loss": 0.2655,
+ "step": 4154
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010056638987797833,
+ "loss": 0.2338,
+ "step": 4155
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010052863091707467,
+ "loss": 0.2014,
+ "step": 4156
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010049087188079983,
+ "loss": 0.2492,
+ "step": 4157
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.0001004531127745373,
+ "loss": 0.2547,
+ "step": 4158
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010041535360367085,
+ "loss": 0.2837,
+ "step": 4159
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010037759437358398,
+ "loss": 0.2598,
+ "step": 4160
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.0001003398350896604,
+ "loss": 0.2047,
+ "step": 4161
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010030207575728374,
+ "loss": 0.2006,
+ "step": 4162
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010026431638183771,
+ "loss": 0.2399,
+ "step": 4163
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010022655696870588,
+ "loss": 0.2508,
+ "step": 4164
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010018879752327202,
+ "loss": 0.2217,
+ "step": 4165
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 8330,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 833,
+ "total_flos": 1.4598637075499581e+19,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-4165/trainer_state.json:com.dropbox.attrs b/checkpoint-4165/trainer_state.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..32786d8313a4d06b0c34c1121e3039aa8c883306
Binary files /dev/null and b/checkpoint-4165/trainer_state.json:com.dropbox.attrs differ
diff --git a/checkpoint-4165/training_args.bin b/checkpoint-4165/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207
--- /dev/null
+++ b/checkpoint-4165/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869
+size 4859
diff --git a/checkpoint-4165/training_args.bin:com.dropbox.attrs b/checkpoint-4165/training_args.bin:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..a91b0f6ae12e6c785b4300279c92a84d88fcd8ef
Binary files /dev/null and b/checkpoint-4165/training_args.bin:com.dropbox.attrs differ
diff --git a/checkpoint-4998/README.md b/checkpoint-4998/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda
--- /dev/null
+++ b/checkpoint-4998/README.md
@@ -0,0 +1,218 @@
+---
+library_name: peft
+base_model: mistralai/Mixtral-8x7B-v0.1
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+
+
+## Training procedure
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+
+### Framework versions
+
+- PEFT 0.7.0
\ No newline at end of file
diff --git a/checkpoint-4998/README.md:com.dropbox.attrs b/checkpoint-4998/README.md:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..27def3893eec6cec258b41a0dceb195b8f9a217c
Binary files /dev/null and b/checkpoint-4998/README.md:com.dropbox.attrs differ
diff --git a/checkpoint-4998/adapter_config.json b/checkpoint-4998/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a
--- /dev/null
+++ b/checkpoint-4998/adapter_config.json
@@ -0,0 +1,32 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_dropout": 0.05,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 64,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "k_proj",
+ "w1",
+ "gate",
+ "w2",
+ "q_proj",
+ "w3",
+ "o_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-4998/adapter_config.json:com.dropbox.attrs b/checkpoint-4998/adapter_config.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..4493853892c5a03db5492e4e8cd18957f5fbe36b
Binary files /dev/null and b/checkpoint-4998/adapter_config.json:com.dropbox.attrs differ
diff --git a/checkpoint-4998/adapter_model.safetensors b/checkpoint-4998/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..77acba980210f7b490312f30eb63496d0e4de3d1
--- /dev/null
+++ b/checkpoint-4998/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:909de69236e61c484cbf6caa4ba219b395bc1dc00a842d28a4b0f9b60da0ea89
+size 3875879784
diff --git a/checkpoint-4998/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-4998/adapter_model.safetensors:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..1518b19b8f13479185c729190828ae3f9a1f07f5
Binary files /dev/null and b/checkpoint-4998/adapter_model.safetensors:com.dropbox.attrs differ
diff --git a/checkpoint-4998/optimizer.pt b/checkpoint-4998/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c00f5f315153ff5085f45b6821d180efcb2d1a2
--- /dev/null
+++ b/checkpoint-4998/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9f3156d0bd06702e3c7446fc2a1546a9e5281333f6573eeddf03d18286b1efe
+size 1943844127
diff --git a/checkpoint-4998/optimizer.pt:com.dropbox.attrs b/checkpoint-4998/optimizer.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..0a2aec84a95eef460302e3406a3f5aa188e99d8a
Binary files /dev/null and b/checkpoint-4998/optimizer.pt:com.dropbox.attrs differ
diff --git a/checkpoint-4998/rng_state.pth b/checkpoint-4998/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bc4eaae57c8679ef7d42f823db407c87f09332ff
--- /dev/null
+++ b/checkpoint-4998/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e45216804b71992ea0913e7ffea6dccebecd90e7c55671ba4e91a8c3b8b8ad5f
+size 14575
diff --git a/checkpoint-4998/rng_state.pth:com.dropbox.attrs b/checkpoint-4998/rng_state.pth:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..e9a3653fbb8cc62699d9dd3313b5032c23fe173a
Binary files /dev/null and b/checkpoint-4998/rng_state.pth:com.dropbox.attrs differ
diff --git a/checkpoint-4998/scheduler.pt b/checkpoint-4998/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f65da8843bdc761fdb378c9899aa3b235d782366
--- /dev/null
+++ b/checkpoint-4998/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3918a3cb0b71ac100385ce1eef20ea8ad28fb8150d1e4e96f5c061a6884160ae
+size 627
diff --git a/checkpoint-4998/scheduler.pt:com.dropbox.attrs b/checkpoint-4998/scheduler.pt:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..b2178948b50537034baa78a63d66d299e08accb8
Binary files /dev/null and b/checkpoint-4998/scheduler.pt:com.dropbox.attrs differ
diff --git a/checkpoint-4998/trainer_state.json b/checkpoint-4998/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..062b17aa12e04ff2b22b5ad5a7144331236505d0
--- /dev/null
+++ b/checkpoint-4998/trainer_state.json
@@ -0,0 +1,30201 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 5.925570228091236,
+ "eval_steps": 209,
+ "global_step": 4998,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0,
+ "learning_rate": 2e-05,
+ "loss": 2.1426,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "eval_loss": 2.071432113647461,
+ "eval_runtime": 279.6718,
+ "eval_samples_per_second": 0.737,
+ "eval_steps_per_second": 0.737,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4e-05,
+ "loss": 2.4033,
+ "step": 2
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 6e-05,
+ "loss": 2.1893,
+ "step": 3
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 8e-05,
+ "loss": 2.3226,
+ "step": 4
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0001,
+ "loss": 2.2485,
+ "step": 5
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00012,
+ "loss": 1.9704,
+ "step": 6
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00014,
+ "loss": 1.6929,
+ "step": 7
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00016,
+ "loss": 2.2957,
+ "step": 8
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00018,
+ "loss": 1.9907,
+ "step": 9
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.0002,
+ "loss": 2.1295,
+ "step": 10
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999999287109068,
+ "loss": 2.2249,
+ "step": 11
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 0.00019999997148436365,
+ "loss": 2.1733,
+ "step": 12
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.000199999935839822,
+ "loss": 2.1404,
+ "step": 13
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999988593747084,
+ "loss": 2.0236,
+ "step": 14
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999982177731722,
+ "loss": 1.9639,
+ "step": 15
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999974335937034,
+ "loss": 1.692,
+ "step": 16
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999965068364137,
+ "loss": 2.3609,
+ "step": 17
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999954375014348,
+ "loss": 2.3553,
+ "step": 18
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999942255889198,
+ "loss": 1.5733,
+ "step": 19
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 0.00019999928710990412,
+ "loss": 1.7505,
+ "step": 20
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999913740319922,
+ "loss": 2.3068,
+ "step": 21
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999897343879862,
+ "loss": 1.8371,
+ "step": 22
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.0001999987952167257,
+ "loss": 1.9852,
+ "step": 23
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999860273700585,
+ "loss": 1.9625,
+ "step": 24
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999839599966655,
+ "loss": 2.1089,
+ "step": 25
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999817500473724,
+ "loss": 2.1086,
+ "step": 26
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999793975224945,
+ "loss": 2.0284,
+ "step": 27
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999769024223673,
+ "loss": 2.3641,
+ "step": 28
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 0.00019999742647473464,
+ "loss": 1.963,
+ "step": 29
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999714844978078,
+ "loss": 2.0635,
+ "step": 30
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999968561674148,
+ "loss": 1.9304,
+ "step": 31
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999654962767839,
+ "loss": 1.4124,
+ "step": 32
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999622883061518,
+ "loss": 2.1444,
+ "step": 33
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999589377627102,
+ "loss": 1.6477,
+ "step": 34
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999955444646936,
+ "loss": 2.2601,
+ "step": 35
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.00019999518089593282,
+ "loss": 1.6256,
+ "step": 36
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 0.0001999948030700404,
+ "loss": 1.9155,
+ "step": 37
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999441098707025,
+ "loss": 2.1408,
+ "step": 38
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999400464707832,
+ "loss": 2.104,
+ "step": 39
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.0001999935840501225,
+ "loss": 1.9841,
+ "step": 40
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999314919626272,
+ "loss": 1.5924,
+ "step": 41
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999270008556108,
+ "loss": 1.9956,
+ "step": 42
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999223671808154,
+ "loss": 1.4673,
+ "step": 43
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999175909389018,
+ "loss": 2.1595,
+ "step": 44
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 0.00019999126721305513,
+ "loss": 1.8439,
+ "step": 45
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999076107564648,
+ "loss": 1.9961,
+ "step": 46
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019999024068173638,
+ "loss": 2.1504,
+ "step": 47
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998970603139912,
+ "loss": 2.2907,
+ "step": 48
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999891571247108,
+ "loss": 1.5709,
+ "step": 49
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999885939617498,
+ "loss": 2.4504,
+ "step": 50
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998801654259632,
+ "loss": 2.3787,
+ "step": 51
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999874248673328,
+ "loss": 2.0434,
+ "step": 52
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.00019998681893604347,
+ "loss": 2.1671,
+ "step": 53
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 0.0001999861987488148,
+ "loss": 1.7432,
+ "step": 54
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998556430573521,
+ "loss": 1.7737,
+ "step": 55
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998491560689513,
+ "loss": 2.0122,
+ "step": 56
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999842526523871,
+ "loss": 1.7545,
+ "step": 57
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998357544230558,
+ "loss": 2.201,
+ "step": 58
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998288397674716,
+ "loss": 2.0396,
+ "step": 59
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999821782558104,
+ "loss": 1.9275,
+ "step": 60
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.00019998145827959598,
+ "loss": 1.7797,
+ "step": 61
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 0.0001999807240482065,
+ "loss": 2.1463,
+ "step": 62
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997997556174665,
+ "loss": 1.935,
+ "step": 63
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999792128203232,
+ "loss": 2.1182,
+ "step": 64
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999784358240448,
+ "loss": 2.2297,
+ "step": 65
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997764457302234,
+ "loss": 2.1052,
+ "step": 66
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999768390673686,
+ "loss": 2.0777,
+ "step": 67
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997601930719835,
+ "loss": 2.1419,
+ "step": 68
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.0001999751852926286,
+ "loss": 2.2586,
+ "step": 69
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 0.00019997433702377817,
+ "loss": 1.9089,
+ "step": 70
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997347450076801,
+ "loss": 2.0587,
+ "step": 71
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997259772372116,
+ "loss": 2.4143,
+ "step": 72
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997170669276256,
+ "loss": 1.947,
+ "step": 73
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019997080140801932,
+ "loss": 2.008,
+ "step": 74
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996988186962041,
+ "loss": 2.4912,
+ "step": 75
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996894807769707,
+ "loss": 2.0279,
+ "step": 76
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996800003238232,
+ "loss": 1.9914,
+ "step": 77
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.0001999670377338114,
+ "loss": 1.9091,
+ "step": 78
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 0.00019996606118212148,
+ "loss": 1.8038,
+ "step": 79
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996507037745183,
+ "loss": 2.3573,
+ "step": 80
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996406531994364,
+ "loss": 2.3204,
+ "step": 81
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999630460097403,
+ "loss": 2.1619,
+ "step": 82
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.0001999620124469871,
+ "loss": 1.9977,
+ "step": 83
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019996096463183142,
+ "loss": 2.195,
+ "step": 84
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995990256442263,
+ "loss": 1.9909,
+ "step": 85
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995882624491217,
+ "loss": 2.2001,
+ "step": 86
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 0.00019995773567345354,
+ "loss": 1.5795,
+ "step": 87
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995663085020212,
+ "loss": 2.174,
+ "step": 88
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995551177531557,
+ "loss": 1.9605,
+ "step": 89
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995437844895334,
+ "loss": 2.1768,
+ "step": 90
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999532308712771,
+ "loss": 1.6906,
+ "step": 91
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995206904245037,
+ "loss": 2.1029,
+ "step": 92
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019995089296263893,
+ "loss": 2.0652,
+ "step": 93
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.00019994970263201035,
+ "loss": 2.1733,
+ "step": 94
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 0.0001999484980507344,
+ "loss": 1.9413,
+ "step": 95
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999472792189828,
+ "loss": 1.9538,
+ "step": 96
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994604613692935,
+ "loss": 2.4158,
+ "step": 97
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994479880474988,
+ "loss": 1.8964,
+ "step": 98
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999435372226222,
+ "loss": 2.3135,
+ "step": 99
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.0001999422613907262,
+ "loss": 2.127,
+ "step": 100
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019994097130924374,
+ "loss": 1.9954,
+ "step": 101
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993966697835883,
+ "loss": 2.1363,
+ "step": 102
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993834839825738,
+ "loss": 1.7779,
+ "step": 103
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 0.00019993701556912742,
+ "loss": 2.0923,
+ "step": 104
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993566849115898,
+ "loss": 1.9183,
+ "step": 105
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993430716454413,
+ "loss": 1.7894,
+ "step": 106
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019993293158947694,
+ "loss": 2.0094,
+ "step": 107
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999315417661536,
+ "loss": 2.1469,
+ "step": 108
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999301376947722,
+ "loss": 1.6924,
+ "step": 109
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.0001999287193755329,
+ "loss": 2.1794,
+ "step": 110
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.000199927286808638,
+ "loss": 2.1338,
+ "step": 111
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 0.00019992583999429178,
+ "loss": 1.9988,
+ "step": 112
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999243789327004,
+ "loss": 2.0735,
+ "step": 113
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999229036240723,
+ "loss": 2.0521,
+ "step": 114
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019992141406861776,
+ "loss": 1.9441,
+ "step": 115
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991991026654918,
+ "loss": 2.1244,
+ "step": 116
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999183922180809,
+ "loss": 1.7937,
+ "step": 117
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.0001999168599234295,
+ "loss": 2.2603,
+ "step": 118
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991531338281332,
+ "loss": 2.1846,
+ "step": 119
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 0.00019991375259645293,
+ "loss": 2.3241,
+ "step": 120
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991217756457085,
+ "loss": 2.0926,
+ "step": 121
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019991058828739165,
+ "loss": 2.0092,
+ "step": 122
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990898476514193,
+ "loss": 1.8076,
+ "step": 123
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990736699805029,
+ "loss": 2.0369,
+ "step": 124
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990573498634742,
+ "loss": 2.0488,
+ "step": 125
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.000199904088730266,
+ "loss": 2.1534,
+ "step": 126
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990242823004074,
+ "loss": 2.1406,
+ "step": 127
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019990075348590839,
+ "loss": 1.9379,
+ "step": 128
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 0.00019989906449810775,
+ "loss": 1.9781,
+ "step": 129
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989736126687963,
+ "loss": 1.973,
+ "step": 130
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989564379246683,
+ "loss": 1.6825,
+ "step": 131
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989391207511428,
+ "loss": 2.0843,
+ "step": 132
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989216611506887,
+ "loss": 1.8547,
+ "step": 133
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019989040591257952,
+ "loss": 1.7626,
+ "step": 134
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.0001998886314678972,
+ "loss": 2.0531,
+ "step": 135
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988684278127497,
+ "loss": 2.0031,
+ "step": 136
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 0.00019988503985296773,
+ "loss": 1.9342,
+ "step": 137
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988322268323268,
+ "loss": 2.3297,
+ "step": 138
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019988139127232878,
+ "loss": 2.3401,
+ "step": 139
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987954562051725,
+ "loss": 1.8983,
+ "step": 140
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.0001998776857280612,
+ "loss": 2.0621,
+ "step": 141
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987581159522578,
+ "loss": 2.0574,
+ "step": 142
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987392322227824,
+ "loss": 1.9516,
+ "step": 143
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987202060948783,
+ "loss": 2.1402,
+ "step": 144
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 0.00019987010375712577,
+ "loss": 1.8903,
+ "step": 145
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986817266546539,
+ "loss": 1.8248,
+ "step": 146
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986622733478204,
+ "loss": 1.9877,
+ "step": 147
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986426776535306,
+ "loss": 1.6272,
+ "step": 148
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986229395745785,
+ "loss": 1.8605,
+ "step": 149
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019986030591137783,
+ "loss": 1.6848,
+ "step": 150
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985830362739647,
+ "loss": 2.1922,
+ "step": 151
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998562871057992,
+ "loss": 2.0238,
+ "step": 152
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.0001998542563468736,
+ "loss": 2.2246,
+ "step": 153
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 0.00019985221135090914,
+ "loss": 1.9438,
+ "step": 154
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019985015211819744,
+ "loss": 2.2136,
+ "step": 155
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998480786490321,
+ "loss": 2.4563,
+ "step": 156
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984599094370874,
+ "loss": 2.2138,
+ "step": 157
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984388900252503,
+ "loss": 2.2679,
+ "step": 158
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019984177282578064,
+ "loss": 1.9537,
+ "step": 159
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998396424137773,
+ "loss": 2.0803,
+ "step": 160
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.0001998374977668188,
+ "loss": 2.0282,
+ "step": 161
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 0.00019983533888521087,
+ "loss": 2.0157,
+ "step": 162
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998331657692613,
+ "loss": 1.7837,
+ "step": 163
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019983097841928,
+ "loss": 2.1556,
+ "step": 164
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982877683557879,
+ "loss": 2.1447,
+ "step": 165
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982656101847162,
+ "loss": 2.4139,
+ "step": 166
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998243309682743,
+ "loss": 1.6788,
+ "step": 167
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019982208668530493,
+ "loss": 1.9008,
+ "step": 168
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.0001998198281698834,
+ "loss": 2.173,
+ "step": 169
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 0.00019981755542233177,
+ "loss": 2.1837,
+ "step": 170
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981526844297404,
+ "loss": 2.0639,
+ "step": 171
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981296723213632,
+ "loss": 2.3864,
+ "step": 172
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019981065179014673,
+ "loss": 1.923,
+ "step": 173
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980832211733535,
+ "loss": 1.9192,
+ "step": 174
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019980597821403438,
+ "loss": 2.0335,
+ "step": 175
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.000199803620080578,
+ "loss": 1.8172,
+ "step": 176
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.0001998012477173024,
+ "loss": 2.0294,
+ "step": 177
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979886112454586,
+ "loss": 2.2889,
+ "step": 178
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 0.00019979646030264867,
+ "loss": 1.8498,
+ "step": 179
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997940452519531,
+ "loss": 2.0797,
+ "step": 180
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997916159728035,
+ "loss": 2.2356,
+ "step": 181
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997891724655462,
+ "loss": 2.1187,
+ "step": 182
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978671473052964,
+ "loss": 1.9301,
+ "step": 183
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019978424276810423,
+ "loss": 1.8582,
+ "step": 184
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.0001997817565786224,
+ "loss": 2.144,
+ "step": 185
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977925616243862,
+ "loss": 2.0595,
+ "step": 186
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 0.00019977674151990945,
+ "loss": 1.9104,
+ "step": 187
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977421265139332,
+ "loss": 1.9727,
+ "step": 188
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019977166955725088,
+ "loss": 1.8727,
+ "step": 189
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997691122378447,
+ "loss": 2.0611,
+ "step": 190
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997665406935394,
+ "loss": 2.0745,
+ "step": 191
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.0001997639549247016,
+ "loss": 1.9974,
+ "step": 192
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019976135493169996,
+ "loss": 1.9856,
+ "step": 193
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975874071490526,
+ "loss": 1.778,
+ "step": 194
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 0.00019975611227469016,
+ "loss": 1.8347,
+ "step": 195
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997534696114294,
+ "loss": 1.5555,
+ "step": 196
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019975081272549989,
+ "loss": 1.5625,
+ "step": 197
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974814161728032,
+ "loss": 1.9997,
+ "step": 198
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974545628715157,
+ "loss": 1.9523,
+ "step": 199
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974275673549654,
+ "loss": 2.1557,
+ "step": 200
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019974004296270006,
+ "loss": 1.8306,
+ "step": 201
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973731496914914,
+ "loss": 2.0051,
+ "step": 202
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.00019973457275523264,
+ "loss": 2.201,
+ "step": 203
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 0.0001997318163213416,
+ "loss": 2.2446,
+ "step": 204
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972904566786903,
+ "loss": 2.1172,
+ "step": 205
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972626079520995,
+ "loss": 1.9849,
+ "step": 206
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019972346170376142,
+ "loss": 1.9774,
+ "step": 207
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.0001997206483939225,
+ "loss": 1.7625,
+ "step": 208
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971782086609436,
+ "loss": 2.2346,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "eval_loss": 2.00066876411438,
+ "eval_runtime": 282.7648,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 209
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971497912068013,
+ "loss": 2.4185,
+ "step": 210
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019971212315808497,
+ "loss": 1.946,
+ "step": 211
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 0.00019970925297871605,
+ "loss": 2.0049,
+ "step": 212
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970636858298267,
+ "loss": 1.9545,
+ "step": 213
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970346997129598,
+ "loss": 1.9636,
+ "step": 214
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019970055714406938,
+ "loss": 1.9068,
+ "step": 215
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969763010171807,
+ "loss": 1.5749,
+ "step": 216
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019969468884465942,
+ "loss": 1.7676,
+ "step": 217
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996917333733128,
+ "loss": 2.0329,
+ "step": 218
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.0001996887636880996,
+ "loss": 1.9307,
+ "step": 219
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 0.00019968577978944323,
+ "loss": 2.134,
+ "step": 220
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019968278167776908,
+ "loss": 2.0911,
+ "step": 221
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967976935350467,
+ "loss": 2.5057,
+ "step": 222
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.0001996767428170795,
+ "loss": 1.9267,
+ "step": 223
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967370206892503,
+ "loss": 2.3569,
+ "step": 224
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019967064710947488,
+ "loss": 1.992,
+ "step": 225
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966757793916454,
+ "loss": 2.01,
+ "step": 226
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966449455843165,
+ "loss": 1.8037,
+ "step": 227
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019966139696771587,
+ "loss": 2.2498,
+ "step": 228
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 0.00019965828516745876,
+ "loss": 1.6563,
+ "step": 229
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996551591581041,
+ "loss": 1.979,
+ "step": 230
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996520189400975,
+ "loss": 2.1553,
+ "step": 231
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996488645138867,
+ "loss": 1.8743,
+ "step": 232
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964569587992148,
+ "loss": 2.1907,
+ "step": 233
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019964251303865362,
+ "loss": 2.0644,
+ "step": 234
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.00019963931599053692,
+ "loss": 2.1721,
+ "step": 235
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996361047360272,
+ "loss": 2.2267,
+ "step": 236
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 0.0001996328792755823,
+ "loss": 1.9445,
+ "step": 237
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019962963960966213,
+ "loss": 2.2003,
+ "step": 238
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996263857387286,
+ "loss": 2.3114,
+ "step": 239
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996231176632456,
+ "loss": 1.8553,
+ "step": 240
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961983538367914,
+ "loss": 2.1349,
+ "step": 241
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019961653890049715,
+ "loss": 1.8784,
+ "step": 242
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.0001996132282141697,
+ "loss": 2.0118,
+ "step": 243
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960990332516874,
+ "loss": 1.9938,
+ "step": 244
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 0.00019960656423396834,
+ "loss": 2.2582,
+ "step": 245
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019960321094104465,
+ "loss": 2.1807,
+ "step": 246
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959984344687578,
+ "loss": 1.9084,
+ "step": 247
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019959646175194174,
+ "loss": 2.2879,
+ "step": 248
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995930658567248,
+ "loss": 1.942,
+ "step": 249
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958965576170908,
+ "loss": 2.1313,
+ "step": 250
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019958623146738088,
+ "loss": 2.3202,
+ "step": 251
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.0001995827929742283,
+ "loss": 1.7832,
+ "step": 252
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957934028274162,
+ "loss": 1.7103,
+ "step": 253
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 0.00019957587339341321,
+ "loss": 1.9912,
+ "step": 254
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995723923067373,
+ "loss": 1.6686,
+ "step": 255
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956889702321023,
+ "loss": 1.966,
+ "step": 256
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956538754333034,
+ "loss": 2.2287,
+ "step": 257
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019956186386759804,
+ "loss": 1.4866,
+ "step": 258
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995583259965157,
+ "loss": 1.9599,
+ "step": 259
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019955477393058773,
+ "loss": 1.9273,
+ "step": 260
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.0001995512076703206,
+ "loss": 1.847,
+ "step": 261
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 0.00019954762721622279,
+ "loss": 2.0535,
+ "step": 262
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995440325688048,
+ "loss": 2.4403,
+ "step": 263
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019954042372857908,
+ "loss": 1.8712,
+ "step": 264
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953680069606026,
+ "loss": 2.1837,
+ "step": 265
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019953316347176488,
+ "loss": 2.0398,
+ "step": 266
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995295120562115,
+ "loss": 2.1135,
+ "step": 267
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952584644992075,
+ "loss": 2.0358,
+ "step": 268
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.00019952216665341526,
+ "loss": 2.3282,
+ "step": 269
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 0.0001995184726672197,
+ "loss": 1.9741,
+ "step": 270
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951476449186074,
+ "loss": 1.7523,
+ "step": 271
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019951104212786712,
+ "loss": 2.1509,
+ "step": 272
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001995073055757695,
+ "loss": 2.0865,
+ "step": 273
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019950355483610067,
+ "loss": 1.8972,
+ "step": 274
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.00019949978990939542,
+ "loss": 2.4693,
+ "step": 275
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994960107961905,
+ "loss": 1.9307,
+ "step": 276
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994922174970248,
+ "loss": 2.0097,
+ "step": 277
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994884100124391,
+ "loss": 1.6561,
+ "step": 278
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 0.0001994845883429763,
+ "loss": 2.3069,
+ "step": 279
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019948075248918124,
+ "loss": 2.0134,
+ "step": 280
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947690245160091,
+ "loss": 2.1061,
+ "step": 281
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019947303823078416,
+ "loss": 2.0855,
+ "step": 282
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946915982728197,
+ "loss": 1.5672,
+ "step": 283
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.0001994652672416473,
+ "loss": 1.7289,
+ "step": 284
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019946136047443522,
+ "loss": 1.9013,
+ "step": 285
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945743952620268,
+ "loss": 2.3105,
+ "step": 286
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 0.00019945350439750872,
+ "loss": 2.341,
+ "step": 287
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944955508891443,
+ "loss": 1.88,
+ "step": 288
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994455916009829,
+ "loss": 1.913,
+ "step": 289
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019944161393427922,
+ "loss": 1.9513,
+ "step": 290
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943762208937053,
+ "loss": 2.3331,
+ "step": 291
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019943361606682597,
+ "loss": 2.3024,
+ "step": 292
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942959586721672,
+ "loss": 2.2222,
+ "step": 293
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.00019942556149111598,
+ "loss": 2.1003,
+ "step": 294
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 0.0001994215129390989,
+ "loss": 1.9038,
+ "step": 295
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941745021174282,
+ "loss": 1.6068,
+ "step": 296
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019941337330962693,
+ "loss": 1.8894,
+ "step": 297
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940928223333252,
+ "loss": 2.3158,
+ "step": 298
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.0001994051769834429,
+ "loss": 2.1015,
+ "step": 299
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019940105756054337,
+ "loss": 2.1519,
+ "step": 300
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939692396522127,
+ "loss": 1.7233,
+ "step": 301
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019939277619806598,
+ "loss": 1.85,
+ "step": 302
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938861425966887,
+ "loss": 2.2368,
+ "step": 303
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 0.00019938443815062335,
+ "loss": 1.765,
+ "step": 304
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993802478715248,
+ "loss": 1.6333,
+ "step": 305
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937604342297073,
+ "loss": 2.191,
+ "step": 306
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019937182480556055,
+ "loss": 2.2402,
+ "step": 307
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019936759201989577,
+ "loss": 2.0568,
+ "step": 308
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.0001993633450665799,
+ "loss": 2.4314,
+ "step": 309
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935908394621844,
+ "loss": 2.0556,
+ "step": 310
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935480865941894,
+ "loss": 2.0988,
+ "step": 311
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 0.00019935051920679094,
+ "loss": 2.0964,
+ "step": 312
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019934621558894607,
+ "loss": 1.9365,
+ "step": 313
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993418978064979,
+ "loss": 1.6224,
+ "step": 314
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933756586006202,
+ "loss": 2.144,
+ "step": 315
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019933321975025616,
+ "loss": 2.2899,
+ "step": 316
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019932885947769992,
+ "loss": 1.8865,
+ "step": 317
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.000199324485043015,
+ "loss": 2.3996,
+ "step": 318
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.0001993200964468251,
+ "loss": 1.3858,
+ "step": 319
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 0.00019931569368975588,
+ "loss": 2.2231,
+ "step": 320
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019931127677243516,
+ "loss": 2.0537,
+ "step": 321
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930684569549264,
+ "loss": 2.1381,
+ "step": 322
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019930240045956012,
+ "loss": 2.0152,
+ "step": 323
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992979410652714,
+ "loss": 2.0293,
+ "step": 324
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019929346751326228,
+ "loss": 1.7457,
+ "step": 325
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928897980417057,
+ "loss": 1.987,
+ "step": 326
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019928447793863616,
+ "loss": 2.2451,
+ "step": 327
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.00019927996191730093,
+ "loss": 2.3312,
+ "step": 328
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 0.0001992754317408087,
+ "loss": 1.8771,
+ "step": 329
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992708874098054,
+ "loss": 1.833,
+ "step": 330
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926632892493896,
+ "loss": 1.9343,
+ "step": 331
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019926175628685937,
+ "loss": 2.2328,
+ "step": 332
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992571694962185,
+ "loss": 1.9916,
+ "step": 333
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992525685536704,
+ "loss": 1.9497,
+ "step": 334
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.000199247953459871,
+ "loss": 2.029,
+ "step": 335
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.00019924332421547835,
+ "loss": 2.0326,
+ "step": 336
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 0.0001992386808211525,
+ "loss": 2.6406,
+ "step": 337
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019923402327755546,
+ "loss": 2.3811,
+ "step": 338
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922935158535129,
+ "loss": 1.6143,
+ "step": 339
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019922466574520608,
+ "loss": 2.2182,
+ "step": 340
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921996575778794,
+ "loss": 2.218,
+ "step": 341
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.000199215251623767,
+ "loss": 1.8615,
+ "step": 342
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019921052334381534,
+ "loss": 2.165,
+ "step": 343
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.00019920578091860716,
+ "loss": 2.1627,
+ "step": 344
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 0.0001992010243488186,
+ "loss": 2.154,
+ "step": 345
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919625363512786,
+ "loss": 1.5966,
+ "step": 346
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019919146877821512,
+ "loss": 2.0903,
+ "step": 347
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991866697787626,
+ "loss": 2.2322,
+ "step": 348
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019918185663745456,
+ "loss": 1.9319,
+ "step": 349
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917702935497725,
+ "loss": 2.1367,
+ "step": 350
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019917218793201886,
+ "loss": 2.1767,
+ "step": 351
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.00019916733236926976,
+ "loss": 2.1009,
+ "step": 352
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991624626674222,
+ "loss": 2.1286,
+ "step": 353
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 0.0001991575788271705,
+ "loss": 2.181,
+ "step": 354
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019915268084921101,
+ "loss": 2.12,
+ "step": 355
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019914776873424206,
+ "loss": 1.9895,
+ "step": 356
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.000199142842482964,
+ "loss": 1.9285,
+ "step": 357
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991379020960792,
+ "loss": 2.2376,
+ "step": 358
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.0001991329475742921,
+ "loss": 2.1274,
+ "step": 359
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912797891830908,
+ "loss": 2.0043,
+ "step": 360
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019912299612883852,
+ "loss": 2.022,
+ "step": 361
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 0.00019911799920659093,
+ "loss": 1.7343,
+ "step": 362
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991129881522787,
+ "loss": 2.0621,
+ "step": 363
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019910796296661632,
+ "loss": 1.5116,
+ "step": 364
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.0001991029236503203,
+ "loss": 2.0485,
+ "step": 365
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909787020410907,
+ "loss": 1.971,
+ "step": 366
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019909280262870324,
+ "loss": 1.9724,
+ "step": 367
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908772092482524,
+ "loss": 1.318,
+ "step": 368
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019908262509319964,
+ "loss": 2.0539,
+ "step": 369
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 0.00019907751513455302,
+ "loss": 2.1097,
+ "step": 370
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019907239104961392,
+ "loss": 2.0632,
+ "step": 371
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906725283911296,
+ "loss": 2.1897,
+ "step": 372
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019906210050378266,
+ "loss": 2.2002,
+ "step": 373
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905693404435773,
+ "loss": 1.9005,
+ "step": 374
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019905175346157474,
+ "loss": 1.9873,
+ "step": 375
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904655875617233,
+ "loss": 1.7215,
+ "step": 376
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019904134992889113,
+ "loss": 2.0434,
+ "step": 377
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903612698047383,
+ "loss": 2.4223,
+ "step": 378
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 0.00019903088991166513,
+ "loss": 2.0837,
+ "step": 379
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902563872321172,
+ "loss": 2.2389,
+ "step": 380
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019902037341586225,
+ "loss": 1.7205,
+ "step": 381
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001990150939903675,
+ "loss": 1.9577,
+ "step": 382
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019900980044748015,
+ "loss": 1.8778,
+ "step": 383
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.000199004492787955,
+ "loss": 2.2213,
+ "step": 384
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899917101254874,
+ "loss": 2.0927,
+ "step": 385
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.00019899383512202019,
+ "loss": 2.2921,
+ "step": 386
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 0.0001989884851171301,
+ "loss": 2.2983,
+ "step": 387
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989831209986413,
+ "loss": 1.8052,
+ "step": 388
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897774276731857,
+ "loss": 1.7741,
+ "step": 389
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019897235042392873,
+ "loss": 1.779,
+ "step": 390
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896694396924063,
+ "loss": 1.6924,
+ "step": 391
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019896152340402509,
+ "loss": 2.036,
+ "step": 392
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019895608872905494,
+ "loss": 2.04,
+ "step": 393
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.0001989506399451051,
+ "loss": 2.1702,
+ "step": 394
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 0.00019894517705295245,
+ "loss": 1.9429,
+ "step": 395
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893970005337584,
+ "loss": 2.0528,
+ "step": 396
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019893420894715618,
+ "loss": 1.7906,
+ "step": 397
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989287037350764,
+ "loss": 2.3494,
+ "step": 398
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019892318441792138,
+ "loss": 1.7415,
+ "step": 399
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989176509964781,
+ "loss": 2.0184,
+ "step": 400
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989121034715355,
+ "loss": 1.9277,
+ "step": 401
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.0001989065418438845,
+ "loss": 2.2168,
+ "step": 402
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019890096611431814,
+ "loss": 2.6114,
+ "step": 403
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 0.00019889537628363133,
+ "loss": 2.0713,
+ "step": 404
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888977235262104,
+ "loss": 2.2966,
+ "step": 405
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019888415432208636,
+ "loss": 2.5206,
+ "step": 406
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887852219282822,
+ "loss": 2.4503,
+ "step": 407
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019887287596564966,
+ "loss": 2.102,
+ "step": 408
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886721564135572,
+ "loss": 2.3275,
+ "step": 409
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019886154122075343,
+ "loss": 2.0481,
+ "step": 410
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885585270465182,
+ "loss": 1.8395,
+ "step": 411
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 0.00019885015009386202,
+ "loss": 2.3535,
+ "step": 412
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.000198844433389197,
+ "loss": 2.0147,
+ "step": 413
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988387025914719,
+ "loss": 2.1919,
+ "step": 414
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988329577015038,
+ "loss": 2.156,
+ "step": 415
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882719872011176,
+ "loss": 2.2672,
+ "step": 416
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019882142564811694,
+ "loss": 2.3242,
+ "step": 417
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.0001988156384863424,
+ "loss": 2.0259,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "eval_loss": 1.9941134452819824,
+ "eval_runtime": 282.533,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 418
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880983723561332,
+ "loss": 1.7039,
+ "step": 419
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 0.00019880402189675678,
+ "loss": 2.1007,
+ "step": 420
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879819247060193,
+ "loss": 2.2297,
+ "step": 421
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019879234895797996,
+ "loss": 1.6166,
+ "step": 422
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.000198786491359724,
+ "loss": 2.408,
+ "step": 423
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019878061967666915,
+ "loss": 1.686,
+ "step": 424
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.0001987747339096527,
+ "loss": 2.0492,
+ "step": 425
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876883405951377,
+ "loss": 2.2179,
+ "step": 426
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019876292012709356,
+ "loss": 1.8812,
+ "step": 427
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 0.00019875699211323528,
+ "loss": 2.2888,
+ "step": 428
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019875105001878409,
+ "loss": 2.0561,
+ "step": 429
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019874509384458725,
+ "loss": 1.9299,
+ "step": 430
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873912359149397,
+ "loss": 2.1999,
+ "step": 431
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019873313926035548,
+ "loss": 1.8509,
+ "step": 432
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019872714085202503,
+ "loss": 1.8281,
+ "step": 433
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987211283673578,
+ "loss": 1.8359,
+ "step": 434
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.0001987151018072111,
+ "loss": 2.2844,
+ "step": 435
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870906117244416,
+ "loss": 1.9397,
+ "step": 436
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 0.00019870300646391824,
+ "loss": 2.302,
+ "step": 437
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869693768249661,
+ "loss": 2.1176,
+ "step": 438
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019869085482904458,
+ "loss": 2.1909,
+ "step": 439
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986847579044294,
+ "loss": 2.2382,
+ "step": 440
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867864690952035,
+ "loss": 2.0988,
+ "step": 441
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019867252184518878,
+ "loss": 2.2136,
+ "step": 442
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.0001986663827123079,
+ "loss": 1.9324,
+ "step": 443
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019866022951175308,
+ "loss": 2.1274,
+ "step": 444
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 0.00019865406224440165,
+ "loss": 1.8625,
+ "step": 445
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019864788091113287,
+ "loss": 2.0009,
+ "step": 446
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.0001986416855128281,
+ "loss": 2.2245,
+ "step": 447
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019863547605037063,
+ "loss": 2.0654,
+ "step": 448
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862925252464586,
+ "loss": 1.4339,
+ "step": 449
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019862301493654108,
+ "loss": 2.1347,
+ "step": 450
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861676328694562,
+ "loss": 1.7029,
+ "step": 451
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019861049757675088,
+ "loss": 2.0081,
+ "step": 452
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 0.00019860421780685018,
+ "loss": 1.9994,
+ "step": 453
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985979239781389,
+ "loss": 1.9325,
+ "step": 454
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019859161609151436,
+ "loss": 1.8502,
+ "step": 455
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.000198585294147876,
+ "loss": 2.3779,
+ "step": 456
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019857895814812509,
+ "loss": 2.0303,
+ "step": 457
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985726080931651,
+ "loss": 1.9898,
+ "step": 458
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019856624398390137,
+ "loss": 1.7648,
+ "step": 459
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019855986582124126,
+ "loss": 1.7822,
+ "step": 460
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.0001985534736060942,
+ "loss": 1.9219,
+ "step": 461
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 0.00019854706733937155,
+ "loss": 2.1789,
+ "step": 462
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019854064702198675,
+ "loss": 1.9091,
+ "step": 463
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019853421265485514,
+ "loss": 1.9941,
+ "step": 464
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985277642388941,
+ "loss": 1.904,
+ "step": 465
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019852130177502316,
+ "loss": 1.6299,
+ "step": 466
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985148252641636,
+ "loss": 1.7712,
+ "step": 467
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019850833470723886,
+ "loss": 1.6825,
+ "step": 468
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.0001985018301051744,
+ "loss": 1.7408,
+ "step": 469
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 0.00019849531145889758,
+ "loss": 2.0622,
+ "step": 470
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019848877876933784,
+ "loss": 1.5699,
+ "step": 471
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.0001984822320374266,
+ "loss": 2.0253,
+ "step": 472
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019847567126409724,
+ "loss": 2.2186,
+ "step": 473
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019846909645028523,
+ "loss": 2.0872,
+ "step": 474
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.000198462507596928,
+ "loss": 1.9362,
+ "step": 475
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019845590470496497,
+ "loss": 2.4109,
+ "step": 476
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844928777533753,
+ "loss": 2.2626,
+ "step": 477
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 0.00019844265680898918,
+ "loss": 2.0874,
+ "step": 478
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984360118068653,
+ "loss": 2.1606,
+ "step": 479
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984293527699133,
+ "loss": 2.063,
+ "step": 480
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019842267969908265,
+ "loss": 1.9065,
+ "step": 481
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001984159925953248,
+ "loss": 1.9511,
+ "step": 482
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840929145959317,
+ "loss": 2.056,
+ "step": 483
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019840257629284317,
+ "loss": 2.2353,
+ "step": 484
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019839584709603226,
+ "loss": 1.9401,
+ "step": 485
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.0001983891038701199,
+ "loss": 1.9648,
+ "step": 486
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 0.00019838234661606748,
+ "loss": 1.753,
+ "step": 487
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019837557533483846,
+ "loss": 1.7805,
+ "step": 488
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836879002739827,
+ "loss": 2.192,
+ "step": 489
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019836199069471437,
+ "loss": 1.9112,
+ "step": 490
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019835517733775615,
+ "loss": 2.0119,
+ "step": 491
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.0001983483499574951,
+ "loss": 1.8932,
+ "step": 492
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019834150855490464,
+ "loss": 1.5968,
+ "step": 493
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019833465313096017,
+ "loss": 2.1493,
+ "step": 494
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 0.00019832778368663917,
+ "loss": 1.8863,
+ "step": 495
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.000198320900222921,
+ "loss": 2.2134,
+ "step": 496
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019831400274078717,
+ "loss": 2.2831,
+ "step": 497
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019830709124122112,
+ "loss": 2.0266,
+ "step": 498
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001983001657252082,
+ "loss": 2.3392,
+ "step": 499
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019829322619373588,
+ "loss": 1.8426,
+ "step": 500
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019828627264779363,
+ "loss": 2.0742,
+ "step": 501
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.0001982793050883728,
+ "loss": 1.9578,
+ "step": 502
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 0.00019827232351646686,
+ "loss": 2.0863,
+ "step": 503
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982653279330712,
+ "loss": 2.2881,
+ "step": 504
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019825831833918323,
+ "loss": 1.8869,
+ "step": 505
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982512947358024,
+ "loss": 1.8997,
+ "step": 506
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019824425712393012,
+ "loss": 1.8945,
+ "step": 507
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019823720550456977,
+ "loss": 1.9496,
+ "step": 508
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.0001982301398787268,
+ "loss": 2.1066,
+ "step": 509
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019822306024740852,
+ "loss": 1.958,
+ "step": 510
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019821596661162447,
+ "loss": 2.1112,
+ "step": 511
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 0.00019820885897238596,
+ "loss": 2.1012,
+ "step": 512
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001982017373307064,
+ "loss": 2.2623,
+ "step": 513
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019819460168760117,
+ "loss": 2.5058,
+ "step": 514
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981874520440877,
+ "loss": 2.1367,
+ "step": 515
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019818028840118532,
+ "loss": 2.2743,
+ "step": 516
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019817311075991543,
+ "loss": 1.5517,
+ "step": 517
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.0001981659191213014,
+ "loss": 1.9569,
+ "step": 518
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815871348636863,
+ "loss": 2.0566,
+ "step": 519
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 0.00019815149385614444,
+ "loss": 1.8859,
+ "step": 520
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019814426023165825,
+ "loss": 2.0298,
+ "step": 521
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019813701261394136,
+ "loss": 2.0614,
+ "step": 522
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812975100402715,
+ "loss": 2.221,
+ "step": 523
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019812247540295096,
+ "loss": 2.1255,
+ "step": 524
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019811518581175014,
+ "loss": 2.1885,
+ "step": 525
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.000198107882231464,
+ "loss": 2.3918,
+ "step": 526
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019810056466313392,
+ "loss": 2.2759,
+ "step": 527
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 0.00019809323310780318,
+ "loss": 1.9727,
+ "step": 528
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980858875665171,
+ "loss": 2.0417,
+ "step": 529
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019807852804032305,
+ "loss": 1.645,
+ "step": 530
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980711545302703,
+ "loss": 1.7943,
+ "step": 531
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019806376703741015,
+ "loss": 1.8844,
+ "step": 532
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019805636556279588,
+ "loss": 2.1128,
+ "step": 533
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.0001980489501074828,
+ "loss": 2.0272,
+ "step": 534
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019804152067252816,
+ "loss": 2.0916,
+ "step": 535
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019803407725899131,
+ "loss": 1.7287,
+ "step": 536
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 0.00019802661986793342,
+ "loss": 2.0667,
+ "step": 537
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801914850041784,
+ "loss": 2.4016,
+ "step": 538
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019801166315750978,
+ "loss": 1.8557,
+ "step": 539
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.0001980041638402765,
+ "loss": 1.8072,
+ "step": 540
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019799665054978722,
+ "loss": 2.2252,
+ "step": 541
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798912328711322,
+ "loss": 2.1377,
+ "step": 542
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019798158205332764,
+ "loss": 2.0306,
+ "step": 543
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019797402684950576,
+ "loss": 1.7428,
+ "step": 544
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 0.00019796645767672477,
+ "loss": 2.0843,
+ "step": 545
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795887453606388,
+ "loss": 1.9175,
+ "step": 546
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019795127742860423,
+ "loss": 1.6673,
+ "step": 547
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.0001979436663554291,
+ "loss": 1.5553,
+ "step": 548
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019793604131762357,
+ "loss": 1.604,
+ "step": 549
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792840231627482,
+ "loss": 2.023,
+ "step": 550
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019792074935247206,
+ "loss": 1.8399,
+ "step": 551
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019791308242730638,
+ "loss": 1.8579,
+ "step": 552
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 0.00019790540154187094,
+ "loss": 2.2135,
+ "step": 553
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019789770669726087,
+ "loss": 1.7894,
+ "step": 554
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788999789457326,
+ "loss": 2.1723,
+ "step": 555
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019788227513490723,
+ "loss": 2.0881,
+ "step": 556
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019787453841936393,
+ "loss": 1.7181,
+ "step": 557
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019786678774904638,
+ "loss": 1.8725,
+ "step": 558
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785902312505964,
+ "loss": 2.0544,
+ "step": 559
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019785124454851084,
+ "loss": 1.7503,
+ "step": 560
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.000197843452020509,
+ "loss": 2.01,
+ "step": 561
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 0.00019783564554216518,
+ "loss": 1.748,
+ "step": 562
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978278251145924,
+ "loss": 2.0866,
+ "step": 563
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001978199907389057,
+ "loss": 1.6046,
+ "step": 564
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019781214241622208,
+ "loss": 1.9222,
+ "step": 565
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019780428014766051,
+ "loss": 2.2003,
+ "step": 566
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019779640393434206,
+ "loss": 2.0534,
+ "step": 567
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.0001977885137773896,
+ "loss": 1.8609,
+ "step": 568
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019778060967792817,
+ "loss": 2.0666,
+ "step": 569
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 0.00019777269163708468,
+ "loss": 1.9512,
+ "step": 570
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019776475965598814,
+ "loss": 1.8349,
+ "step": 571
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977568137357694,
+ "loss": 2.0507,
+ "step": 572
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019774885387756138,
+ "loss": 1.7588,
+ "step": 573
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.000197740880082499,
+ "loss": 2.0981,
+ "step": 574
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019773289235171918,
+ "loss": 2.0953,
+ "step": 575
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019772489068636077,
+ "loss": 2.0678,
+ "step": 576
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.00019771687508756466,
+ "loss": 2.0136,
+ "step": 577
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 0.0001977088455564736,
+ "loss": 1.9781,
+ "step": 578
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019770080209423254,
+ "loss": 2.2185,
+ "step": 579
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019769274470198827,
+ "loss": 1.8076,
+ "step": 580
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019768467338088957,
+ "loss": 1.6888,
+ "step": 581
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019767658813208726,
+ "loss": 2.1273,
+ "step": 582
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.0001976684889567341,
+ "loss": 2.3232,
+ "step": 583
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019766037585598487,
+ "loss": 2.366,
+ "step": 584
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019765224883099635,
+ "loss": 1.8939,
+ "step": 585
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019764410788292722,
+ "loss": 2.0162,
+ "step": 586
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 0.00019763595301293822,
+ "loss": 2.2752,
+ "step": 587
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976277842221921,
+ "loss": 1.9461,
+ "step": 588
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976196015118535,
+ "loss": 1.9999,
+ "step": 589
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001976114048830891,
+ "loss": 2.0169,
+ "step": 590
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019760319433706757,
+ "loss": 2.1838,
+ "step": 591
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019759496987495955,
+ "loss": 2.3513,
+ "step": 592
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975867314979377,
+ "loss": 1.9915,
+ "step": 593
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.0001975784792071766,
+ "loss": 2.1973,
+ "step": 594
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 0.00019757021300385286,
+ "loss": 2.3112,
+ "step": 595
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019756193288914507,
+ "loss": 2.0992,
+ "step": 596
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019755363886423376,
+ "loss": 2.4266,
+ "step": 597
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019754533093030148,
+ "loss": 1.7649,
+ "step": 598
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975370090885328,
+ "loss": 1.7573,
+ "step": 599
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019752867334011423,
+ "loss": 1.7949,
+ "step": 600
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.0001975203236862342,
+ "loss": 2.0229,
+ "step": 601
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019751196012808325,
+ "loss": 2.0519,
+ "step": 602
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 0.00019750358266685383,
+ "loss": 2.0829,
+ "step": 603
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019749519130374038,
+ "loss": 2.0153,
+ "step": 604
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019748678603993933,
+ "loss": 1.8594,
+ "step": 605
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019747836687664908,
+ "loss": 2.1385,
+ "step": 606
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746993381507003,
+ "loss": 2.1317,
+ "step": 607
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019746148685640451,
+ "loss": 1.1676,
+ "step": 608
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974530260018569,
+ "loss": 2.2856,
+ "step": 609
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.0001974445512526336,
+ "loss": 2.1973,
+ "step": 610
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019743606260994278,
+ "loss": 1.6912,
+ "step": 611
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 0.00019742756007499486,
+ "loss": 1.8091,
+ "step": 612
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741904364900208,
+ "loss": 2.0108,
+ "step": 613
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019741051333317867,
+ "loss": 2.1061,
+ "step": 614
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019740196912874087,
+ "loss": 1.8934,
+ "step": 615
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019739341103690693,
+ "loss": 1.8599,
+ "step": 616
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019738483905889703,
+ "loss": 2.0025,
+ "step": 617
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019737625319593335,
+ "loss": 1.8247,
+ "step": 618
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019736765344924005,
+ "loss": 2.222,
+ "step": 619
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 0.00019735903982004324,
+ "loss": 2.116,
+ "step": 620
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973504123095711,
+ "loss": 1.9183,
+ "step": 621
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973417709190536,
+ "loss": 2.1507,
+ "step": 622
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019733311564972296,
+ "loss": 1.7899,
+ "step": 623
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019732444650281315,
+ "loss": 2.1005,
+ "step": 624
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.0001973157634795602,
+ "loss": 2.2391,
+ "step": 625
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019730706658120214,
+ "loss": 1.9466,
+ "step": 626
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.000197298355808979,
+ "loss": 1.9854,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "eval_loss": 1.9957869052886963,
+ "eval_runtime": 282.5544,
+ "eval_samples_per_second": 0.729,
+ "eval_steps_per_second": 0.729,
+ "step": 627
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 0.00019728963116413266,
+ "loss": 2.1877,
+ "step": 628
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019728089264790712,
+ "loss": 2.2194,
+ "step": 629
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019727214026154827,
+ "loss": 1.9631,
+ "step": 630
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019726337400630405,
+ "loss": 2.3506,
+ "step": 631
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019725459388342432,
+ "loss": 2.0543,
+ "step": 632
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972457998941609,
+ "loss": 2.0402,
+ "step": 633
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019723699203976766,
+ "loss": 1.9316,
+ "step": 634
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.0001972281703215004,
+ "loss": 2.2024,
+ "step": 635
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721933474061692,
+ "loss": 1.6776,
+ "step": 636
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 0.00019721048529837694,
+ "loss": 1.9757,
+ "step": 637
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019720162199604222,
+ "loss": 1.7631,
+ "step": 638
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019719274483487648,
+ "loss": 2.34,
+ "step": 639
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.0001971838538161454,
+ "loss": 1.8469,
+ "step": 640
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019717494894111662,
+ "loss": 2.3151,
+ "step": 641
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019716603021105987,
+ "loss": 2.0661,
+ "step": 642
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019715709762724667,
+ "loss": 2.0408,
+ "step": 643
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019714815119095062,
+ "loss": 1.9848,
+ "step": 644
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 0.00019713919090344736,
+ "loss": 2.3134,
+ "step": 645
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019713021676601438,
+ "loss": 2.4947,
+ "step": 646
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001971212287799312,
+ "loss": 2.0515,
+ "step": 647
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019711222694647932,
+ "loss": 2.6216,
+ "step": 648
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019710321126694216,
+ "loss": 1.6517,
+ "step": 649
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.0001970941817426052,
+ "loss": 2.0408,
+ "step": 650
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019708513837475588,
+ "loss": 1.8841,
+ "step": 651
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019707608116468356,
+ "loss": 2.1966,
+ "step": 652
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 0.00019706701011367955,
+ "loss": 1.7587,
+ "step": 653
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970579252230373,
+ "loss": 2.2196,
+ "step": 654
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019704882649405198,
+ "loss": 1.8146,
+ "step": 655
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703971392802098,
+ "loss": 2.2932,
+ "step": 656
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019703058752624353,
+ "loss": 1.923,
+ "step": 657
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.0001970214472900208,
+ "loss": 2.2393,
+ "step": 658
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019701229322065605,
+ "loss": 1.7338,
+ "step": 659
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019700312531945442,
+ "loss": 1.7859,
+ "step": 660
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019699394358772306,
+ "loss": 2.2719,
+ "step": 661
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 0.00019698474802677107,
+ "loss": 1.576,
+ "step": 662
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019697553863790956,
+ "loss": 2.3333,
+ "step": 663
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019696631542245156,
+ "loss": 2.3508,
+ "step": 664
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019695707838171216,
+ "loss": 2.1876,
+ "step": 665
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019694782751700828,
+ "loss": 1.4863,
+ "step": 666
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019693856282965898,
+ "loss": 1.8948,
+ "step": 667
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019692928432098512,
+ "loss": 1.6867,
+ "step": 668
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691999199230963,
+ "loss": 1.7682,
+ "step": 669
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 0.00019691068584495742,
+ "loss": 2.0914,
+ "step": 670
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019690136588025535,
+ "loss": 2.1413,
+ "step": 671
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019689203209953223,
+ "loss": 2.1275,
+ "step": 672
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.0001968826845041188,
+ "loss": 1.9556,
+ "step": 673
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019687332309534792,
+ "loss": 2.2209,
+ "step": 674
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019686394787455424,
+ "loss": 1.9853,
+ "step": 675
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019685455884307454,
+ "loss": 2.0877,
+ "step": 676
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019684515600224743,
+ "loss": 2.1607,
+ "step": 677
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 0.00019683573935341358,
+ "loss": 2.2664,
+ "step": 678
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019682630889791556,
+ "loss": 1.8527,
+ "step": 679
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.000196816864637098,
+ "loss": 1.8417,
+ "step": 680
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019680740657230738,
+ "loss": 1.9853,
+ "step": 681
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019679793470489228,
+ "loss": 1.8419,
+ "step": 682
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019678844903620317,
+ "loss": 1.9971,
+ "step": 683
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019677894956759246,
+ "loss": 1.9843,
+ "step": 684
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019676943630041462,
+ "loss": 2.376,
+ "step": 685
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675990923602598,
+ "loss": 2.1558,
+ "step": 686
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 0.00019675036837578494,
+ "loss": 1.5752,
+ "step": 687
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001967408137210518,
+ "loss": 1.6704,
+ "step": 688
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019673124527318881,
+ "loss": 2.1389,
+ "step": 689
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019672166303356028,
+ "loss": 2.126,
+ "step": 690
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019671206700353237,
+ "loss": 1.9402,
+ "step": 691
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019670245718447335,
+ "loss": 1.6701,
+ "step": 692
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019669283357775328,
+ "loss": 1.8134,
+ "step": 693
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.0001966831961847443,
+ "loss": 2.1642,
+ "step": 694
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 0.00019667354500682054,
+ "loss": 1.8455,
+ "step": 695
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.000196663880045358,
+ "loss": 1.9646,
+ "step": 696
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966542013017347,
+ "loss": 1.9855,
+ "step": 697
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019664450877733062,
+ "loss": 1.7029,
+ "step": 698
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019663480247352773,
+ "loss": 1.9789,
+ "step": 699
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001966250823917099,
+ "loss": 1.8751,
+ "step": 700
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019661534853326301,
+ "loss": 2.3644,
+ "step": 701
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.00019660560089957492,
+ "loss": 1.8006,
+ "step": 702
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 0.0001965958394920354,
+ "loss": 2.2799,
+ "step": 703
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019658606431203622,
+ "loss": 1.9258,
+ "step": 704
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965762753609711,
+ "loss": 1.9521,
+ "step": 705
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019656647264023575,
+ "loss": 1.9675,
+ "step": 706
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019655665615122783,
+ "loss": 2.3686,
+ "step": 707
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019654682589534693,
+ "loss": 2.1448,
+ "step": 708
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019653698187399466,
+ "loss": 2.2475,
+ "step": 709
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965271240885745,
+ "loss": 1.9417,
+ "step": 710
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.0001965172525404921,
+ "loss": 2.154,
+ "step": 711
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 0.00019650736723115475,
+ "loss": 2.0646,
+ "step": 712
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019649746816197196,
+ "loss": 2.235,
+ "step": 713
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019648755533435518,
+ "loss": 1.7122,
+ "step": 714
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019647762874971765,
+ "loss": 2.0635,
+ "step": 715
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019646768840947474,
+ "loss": 1.8904,
+ "step": 716
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019645773431504373,
+ "loss": 1.608,
+ "step": 717
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019644776646784388,
+ "loss": 2.2307,
+ "step": 718
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.0001964377848692963,
+ "loss": 2.176,
+ "step": 719
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 0.00019642778952082426,
+ "loss": 2.1984,
+ "step": 720
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001964177804238528,
+ "loss": 2.2625,
+ "step": 721
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019640775757980903,
+ "loss": 2.3142,
+ "step": 722
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019639772099012197,
+ "loss": 2.2366,
+ "step": 723
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019638767065622266,
+ "loss": 1.7823,
+ "step": 724
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.000196377606579544,
+ "loss": 2.0677,
+ "step": 725
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019636752876152095,
+ "loss": 1.3337,
+ "step": 726
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.00019635743720359037,
+ "loss": 2.055,
+ "step": 727
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 0.0001963473319071911,
+ "loss": 1.9888,
+ "step": 728
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019633721287376393,
+ "loss": 1.9258,
+ "step": 729
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019632708010475165,
+ "loss": 2.3768,
+ "step": 730
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001963169336015989,
+ "loss": 1.993,
+ "step": 731
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019630677336575242,
+ "loss": 2.1989,
+ "step": 732
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.0001962965993986608,
+ "loss": 2.1216,
+ "step": 733
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019628641170177464,
+ "loss": 2.2217,
+ "step": 734
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019627621027654648,
+ "loss": 1.8809,
+ "step": 735
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019626599512443077,
+ "loss": 2.0864,
+ "step": 736
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 0.00019625576624688406,
+ "loss": 2.0627,
+ "step": 737
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019624552364536473,
+ "loss": 2.1347,
+ "step": 738
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019623526732133315,
+ "loss": 1.9998,
+ "step": 739
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019622499727625162,
+ "loss": 2.1998,
+ "step": 740
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019621471351158443,
+ "loss": 1.974,
+ "step": 741
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019620441602879787,
+ "loss": 1.9425,
+ "step": 742
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019619410482936008,
+ "loss": 2.6227,
+ "step": 743
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019618377991474124,
+ "loss": 2.1209,
+ "step": 744
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 0.00019617344128641345,
+ "loss": 2.0606,
+ "step": 745
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019616308894585078,
+ "loss": 2.296,
+ "step": 746
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019615272289452923,
+ "loss": 2.0415,
+ "step": 747
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961423431339268,
+ "loss": 1.9516,
+ "step": 748
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961319496655234,
+ "loss": 2.0468,
+ "step": 749
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961215424908009,
+ "loss": 1.877,
+ "step": 750
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.0001961111216112432,
+ "loss": 1.8129,
+ "step": 751
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019610068702833596,
+ "loss": 1.9984,
+ "step": 752
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 0.00019609023874356707,
+ "loss": 1.9013,
+ "step": 753
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019607977675842615,
+ "loss": 2.0546,
+ "step": 754
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019606930107440485,
+ "loss": 2.2817,
+ "step": 755
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960588116929968,
+ "loss": 2.0578,
+ "step": 756
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019604830861569755,
+ "loss": 2.3521,
+ "step": 757
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019603779184400457,
+ "loss": 2.0392,
+ "step": 758
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960272613794174,
+ "loss": 1.9863,
+ "step": 759
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.00019601671722343738,
+ "loss": 2.1889,
+ "step": 760
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001960061593775679,
+ "loss": 2.0908,
+ "step": 761
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 0.0001959955878433143,
+ "loss": 1.986,
+ "step": 762
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019598500262218386,
+ "loss": 2.0339,
+ "step": 763
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019597440371568574,
+ "loss": 2.0958,
+ "step": 764
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.0001959637911253312,
+ "loss": 1.9866,
+ "step": 765
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019595316485263327,
+ "loss": 2.2228,
+ "step": 766
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019594252489910706,
+ "loss": 1.915,
+ "step": 767
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019593187126626965,
+ "loss": 2.0741,
+ "step": 768
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019592120395563994,
+ "loss": 2.5346,
+ "step": 769
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 0.00019591052296873888,
+ "loss": 2.4908,
+ "step": 770
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019589982830708937,
+ "loss": 2.1042,
+ "step": 771
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019588911997221625,
+ "loss": 1.8676,
+ "step": 772
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958783979656462,
+ "loss": 1.9152,
+ "step": 773
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019586766228890806,
+ "loss": 1.7784,
+ "step": 774
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958569129435324,
+ "loss": 2.0784,
+ "step": 775
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.0001958461499310519,
+ "loss": 1.7262,
+ "step": 776
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019583537325300118,
+ "loss": 2.4154,
+ "step": 777
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 0.00019582458291091663,
+ "loss": 2.3185,
+ "step": 778
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019581377890633684,
+ "loss": 2.0981,
+ "step": 779
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019580296124080212,
+ "loss": 1.8952,
+ "step": 780
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019579212991585493,
+ "loss": 1.7208,
+ "step": 781
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019578128493303955,
+ "loss": 2.0209,
+ "step": 782
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019577042629390217,
+ "loss": 2.1867,
+ "step": 783
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957595539999911,
+ "loss": 2.0805,
+ "step": 784
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019574866805285645,
+ "loss": 2.0451,
+ "step": 785
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.00019573776845405028,
+ "loss": 2.2056,
+ "step": 786
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 0.0001957268552051267,
+ "loss": 2.0773,
+ "step": 787
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019571592830764165,
+ "loss": 2.2036,
+ "step": 788
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019570498776315309,
+ "loss": 1.7298,
+ "step": 789
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956940335732209,
+ "loss": 1.8931,
+ "step": 790
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.0001956830657394069,
+ "loss": 2.1567,
+ "step": 791
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019567208426327488,
+ "loss": 1.9471,
+ "step": 792
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019566108914639054,
+ "loss": 1.8916,
+ "step": 793
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019565008039032158,
+ "loss": 2.0111,
+ "step": 794
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 0.00019563905799663752,
+ "loss": 2.1374,
+ "step": 795
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019562802196691003,
+ "loss": 2.3083,
+ "step": 796
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019561697230271254,
+ "loss": 2.0381,
+ "step": 797
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001956059090056205,
+ "loss": 2.1909,
+ "step": 798
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019559483207721133,
+ "loss": 1.9893,
+ "step": 799
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955837415190643,
+ "loss": 2.3178,
+ "step": 800
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.0001955726373327607,
+ "loss": 2.0815,
+ "step": 801
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019556151951988376,
+ "loss": 1.6012,
+ "step": 802
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 0.00019555038808201865,
+ "loss": 1.4965,
+ "step": 803
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019553924302075242,
+ "loss": 2.3069,
+ "step": 804
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019552808433767415,
+ "loss": 2.2388,
+ "step": 805
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019551691203437482,
+ "loss": 2.5662,
+ "step": 806
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019550572611244738,
+ "loss": 1.9419,
+ "step": 807
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019549452657348663,
+ "loss": 2.3638,
+ "step": 808
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019548331341908947,
+ "loss": 2.1567,
+ "step": 809
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019547208665085457,
+ "loss": 1.9697,
+ "step": 810
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.00019546084627038268,
+ "loss": 1.9006,
+ "step": 811
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 0.0001954495922792764,
+ "loss": 2.304,
+ "step": 812
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.0001954383246791403,
+ "loss": 2.0494,
+ "step": 813
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019542704347158093,
+ "loss": 1.8562,
+ "step": 814
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019541574865820672,
+ "loss": 2.1041,
+ "step": 815
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019540444024062804,
+ "loss": 2.22,
+ "step": 816
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019539311822045727,
+ "loss": 1.9925,
+ "step": 817
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019538178259930869,
+ "loss": 2.3213,
+ "step": 818
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019537043337879845,
+ "loss": 2.0319,
+ "step": 819
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 0.00019535907056054475,
+ "loss": 1.8578,
+ "step": 820
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019534769414616764,
+ "loss": 1.4115,
+ "step": 821
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953363041372892,
+ "loss": 2.0731,
+ "step": 822
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019532490053553335,
+ "loss": 2.0605,
+ "step": 823
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019531348334252607,
+ "loss": 1.9044,
+ "step": 824
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001953020525598951,
+ "loss": 1.7405,
+ "step": 825
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.0001952906081892703,
+ "loss": 1.898,
+ "step": 826
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019527915023228332,
+ "loss": 1.9696,
+ "step": 827
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 0.00019526767869056788,
+ "loss": 2.0469,
+ "step": 828
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019525619356575952,
+ "loss": 2.0307,
+ "step": 829
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019524469485949583,
+ "loss": 2.002,
+ "step": 830
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019523318257341622,
+ "loss": 1.9438,
+ "step": 831
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019522165670916207,
+ "loss": 1.535,
+ "step": 832
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001952101172683768,
+ "loss": 1.7505,
+ "step": 833
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019519856425270562,
+ "loss": 2.2248,
+ "step": 834
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019518699766379576,
+ "loss": 2.0669,
+ "step": 835
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019517541750329635,
+ "loss": 2.0268,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "eval_loss": 1.9969017505645752,
+ "eval_runtime": 283.3157,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 836
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019516382377285848,
+ "loss": 1.6712,
+ "step": 837
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001951522164741352,
+ "loss": 2.1558,
+ "step": 838
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019514059560878138,
+ "loss": 2.1599,
+ "step": 839
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019512896117845392,
+ "loss": 1.8762,
+ "step": 840
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019511731318481168,
+ "loss": 2.0189,
+ "step": 841
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019510565162951537,
+ "loss": 1.9364,
+ "step": 842
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019509397651422769,
+ "loss": 1.7319,
+ "step": 843
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019508228784061326,
+ "loss": 1.9424,
+ "step": 844
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001950705856103386,
+ "loss": 2.277,
+ "step": 845
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019505886982507225,
+ "loss": 1.6511,
+ "step": 846
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950471404864846,
+ "loss": 1.9056,
+ "step": 847
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019503539759624798,
+ "loss": 1.5105,
+ "step": 848
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.0001950236411560367,
+ "loss": 1.9469,
+ "step": 849
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 0.00019501187116752693,
+ "loss": 1.5012,
+ "step": 850
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019500008763239683,
+ "loss": 1.7086,
+ "step": 851
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019498829055232647,
+ "loss": 1.5586,
+ "step": 852
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019497647992899788,
+ "loss": 1.5573,
+ "step": 853
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.000194964655764095,
+ "loss": 2.0757,
+ "step": 854
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019495281805930367,
+ "loss": 1.5478,
+ "step": 855
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019494096681631172,
+ "loss": 1.7068,
+ "step": 856
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.00019492910203680884,
+ "loss": 1.6759,
+ "step": 857
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 0.0001949172237224867,
+ "loss": 1.4621,
+ "step": 858
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019490533187503892,
+ "loss": 1.5359,
+ "step": 859
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.000194893426496161,
+ "loss": 1.9365,
+ "step": 860
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019488150758755035,
+ "loss": 1.7089,
+ "step": 861
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019486957515090641,
+ "loss": 1.4924,
+ "step": 862
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019485762918793046,
+ "loss": 1.387,
+ "step": 863
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.0001948456697003257,
+ "loss": 1.631,
+ "step": 864
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019483369668979732,
+ "loss": 1.7953,
+ "step": 865
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 0.00019482171015805245,
+ "loss": 1.7552,
+ "step": 866
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019480971010680002,
+ "loss": 1.8313,
+ "step": 867
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019479769653775106,
+ "loss": 1.593,
+ "step": 868
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019478566945261837,
+ "loss": 1.9506,
+ "step": 869
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019477362885311682,
+ "loss": 1.9598,
+ "step": 870
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947615747409631,
+ "loss": 1.7324,
+ "step": 871
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019474950711787585,
+ "loss": 2.1208,
+ "step": 872
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.0001947374259855757,
+ "loss": 1.4111,
+ "step": 873
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019472533134578507,
+ "loss": 1.6696,
+ "step": 874
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 0.00019471322320022849,
+ "loss": 1.6999,
+ "step": 875
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019470110155063225,
+ "loss": 2.1287,
+ "step": 876
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019468896639872468,
+ "loss": 1.874,
+ "step": 877
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019467681774623592,
+ "loss": 1.7149,
+ "step": 878
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019466465559489816,
+ "loss": 1.9563,
+ "step": 879
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019465247994644545,
+ "loss": 1.3504,
+ "step": 880
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019464029080261378,
+ "loss": 1.6176,
+ "step": 881
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019462808816514103,
+ "loss": 1.7577,
+ "step": 882
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 0.00019461587203576706,
+ "loss": 1.8054,
+ "step": 883
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019460364241623358,
+ "loss": 2.0246,
+ "step": 884
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019459139930828428,
+ "loss": 1.7645,
+ "step": 885
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945791427136648,
+ "loss": 1.9225,
+ "step": 886
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019456687263412262,
+ "loss": 1.8967,
+ "step": 887
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945545890714072,
+ "loss": 1.5287,
+ "step": 888
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945422920272699,
+ "loss": 1.5033,
+ "step": 889
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.00019452998150346401,
+ "loss": 2.0148,
+ "step": 890
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 0.0001945176575017448,
+ "loss": 1.3706,
+ "step": 891
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001945053200238693,
+ "loss": 1.7603,
+ "step": 892
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019449296907159667,
+ "loss": 1.9884,
+ "step": 893
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019448060464668783,
+ "loss": 1.6133,
+ "step": 894
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019446822675090565,
+ "loss": 1.7885,
+ "step": 895
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019445583538601498,
+ "loss": 1.8573,
+ "step": 896
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944434305537826,
+ "loss": 1.7241,
+ "step": 897
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944310122559771,
+ "loss": 1.8942,
+ "step": 898
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.0001944185804943691,
+ "loss": 1.7541,
+ "step": 899
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 0.00019440613527073105,
+ "loss": 1.9608,
+ "step": 900
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019439367658683745,
+ "loss": 2.0969,
+ "step": 901
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019438120444446457,
+ "loss": 2.2589,
+ "step": 902
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943687188453907,
+ "loss": 1.7335,
+ "step": 903
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019435621979139596,
+ "loss": 1.8663,
+ "step": 904
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019434370728426252,
+ "loss": 1.5627,
+ "step": 905
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943311813257743,
+ "loss": 1.6101,
+ "step": 906
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.00019431864191771732,
+ "loss": 1.9661,
+ "step": 907
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 0.0001943060890618794,
+ "loss": 1.6487,
+ "step": 908
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019429352276005026,
+ "loss": 2.1282,
+ "step": 909
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019428094301402162,
+ "loss": 1.6944,
+ "step": 910
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019426834982558705,
+ "loss": 1.2433,
+ "step": 911
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019425574319654213,
+ "loss": 1.5735,
+ "step": 912
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019424312312868417,
+ "loss": 1.6499,
+ "step": 913
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019423048962381265,
+ "loss": 1.8366,
+ "step": 914
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019421784268372876,
+ "loss": 1.906,
+ "step": 915
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 0.00019420518231023568,
+ "loss": 1.5976,
+ "step": 916
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941925085051385,
+ "loss": 1.6722,
+ "step": 917
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019417982127024422,
+ "loss": 1.8832,
+ "step": 918
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019416712060736183,
+ "loss": 1.8865,
+ "step": 919
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019415440651830208,
+ "loss": 1.6627,
+ "step": 920
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941416790048778,
+ "loss": 1.3598,
+ "step": 921
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019412893806890357,
+ "loss": 2.0506,
+ "step": 922
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.00019411618371219605,
+ "loss": 1.9794,
+ "step": 923
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001941034159365737,
+ "loss": 1.7851,
+ "step": 924
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 0.0001940906347438569,
+ "loss": 1.8312,
+ "step": 925
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019407784013586804,
+ "loss": 1.5167,
+ "step": 926
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019406503211443128,
+ "loss": 1.5725,
+ "step": 927
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019405221068137277,
+ "loss": 1.8857,
+ "step": 928
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019403937583852061,
+ "loss": 1.741,
+ "step": 929
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019402652758770475,
+ "loss": 1.6748,
+ "step": 930
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019401366593075706,
+ "loss": 1.7285,
+ "step": 931
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019400079086951135,
+ "loss": 1.7545,
+ "step": 932
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 0.00019398790240580333,
+ "loss": 1.4491,
+ "step": 933
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019397500054147058,
+ "loss": 1.3359,
+ "step": 934
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019396208527835263,
+ "loss": 1.9567,
+ "step": 935
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.0001939491566182909,
+ "loss": 2.0011,
+ "step": 936
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019393621456312881,
+ "loss": 1.9076,
+ "step": 937
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019392325911471155,
+ "loss": 1.5388,
+ "step": 938
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019391029027488629,
+ "loss": 1.2337,
+ "step": 939
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019389730804550211,
+ "loss": 1.5752,
+ "step": 940
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 0.00019388431242840998,
+ "loss": 1.9131,
+ "step": 941
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019387130342546284,
+ "loss": 1.4177,
+ "step": 942
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019385828103851544,
+ "loss": 1.5865,
+ "step": 943
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001938452452694245,
+ "loss": 1.6335,
+ "step": 944
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019383219612004865,
+ "loss": 1.8599,
+ "step": 945
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019381913359224842,
+ "loss": 1.3035,
+ "step": 946
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019380605768788621,
+ "loss": 1.7586,
+ "step": 947
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.0001937929684088264,
+ "loss": 1.7334,
+ "step": 948
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019377986575693518,
+ "loss": 1.5749,
+ "step": 949
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 0.00019376674973408075,
+ "loss": 1.874,
+ "step": 950
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019375362034213314,
+ "loss": 2.3055,
+ "step": 951
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019374047758296433,
+ "loss": 1.5801,
+ "step": 952
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001937273214584482,
+ "loss": 1.8788,
+ "step": 953
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019371415197046052,
+ "loss": 2.431,
+ "step": 954
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019370096912087897,
+ "loss": 1.4963,
+ "step": 955
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.0001936877729115831,
+ "loss": 1.514,
+ "step": 956
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019367456334445446,
+ "loss": 1.6099,
+ "step": 957
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 0.00019366134042137642,
+ "loss": 1.9367,
+ "step": 958
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019364810414423427,
+ "loss": 1.7384,
+ "step": 959
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019363485451491524,
+ "loss": 1.6166,
+ "step": 960
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019362159153530844,
+ "loss": 1.955,
+ "step": 961
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019360831520730482,
+ "loss": 1.4189,
+ "step": 962
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019359502553279736,
+ "loss": 1.4506,
+ "step": 963
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019358172251368087,
+ "loss": 1.7108,
+ "step": 964
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019356840615185203,
+ "loss": 1.6641,
+ "step": 965
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 0.00019355507644920952,
+ "loss": 1.7506,
+ "step": 966
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019354173340765382,
+ "loss": 2.0598,
+ "step": 967
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935283770290874,
+ "loss": 1.3494,
+ "step": 968
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019351500731541453,
+ "loss": 1.6571,
+ "step": 969
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001935016242685415,
+ "loss": 1.6403,
+ "step": 970
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019348822789037637,
+ "loss": 1.7555,
+ "step": 971
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.00019347481818282925,
+ "loss": 2.1451,
+ "step": 972
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.000193461395147812,
+ "loss": 1.4522,
+ "step": 973
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934479587872385,
+ "loss": 1.7147,
+ "step": 974
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 0.0001934345091030245,
+ "loss": 1.3909,
+ "step": 975
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019342104609708756,
+ "loss": 1.8104,
+ "step": 976
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019340756977134728,
+ "loss": 1.5221,
+ "step": 977
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.000193394080127725,
+ "loss": 1.9447,
+ "step": 978
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.0001933805771681442,
+ "loss": 1.5742,
+ "step": 979
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019336706089452996,
+ "loss": 1.5312,
+ "step": 980
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019335353130880948,
+ "loss": 1.4304,
+ "step": 981
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019333998841291177,
+ "loss": 1.8379,
+ "step": 982
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 0.00019332643220876773,
+ "loss": 1.877,
+ "step": 983
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001933128626983102,
+ "loss": 1.9627,
+ "step": 984
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932992798834739,
+ "loss": 1.7857,
+ "step": 985
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019328568376619543,
+ "loss": 1.3189,
+ "step": 986
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019327207434841333,
+ "loss": 1.9588,
+ "step": 987
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019325845163206795,
+ "loss": 1.3132,
+ "step": 988
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019324481561910163,
+ "loss": 1.6304,
+ "step": 989
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.0001932311663114586,
+ "loss": 1.8322,
+ "step": 990
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 0.00019321750371108486,
+ "loss": 1.4192,
+ "step": 991
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001932038278199285,
+ "loss": 1.3915,
+ "step": 992
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019319013863993933,
+ "loss": 1.8433,
+ "step": 993
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931764361730692,
+ "loss": 2.1459,
+ "step": 994
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001931627204212717,
+ "loss": 1.9799,
+ "step": 995
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019314899138650243,
+ "loss": 1.855,
+ "step": 996
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019313524907071887,
+ "loss": 1.4763,
+ "step": 997
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019312149347588037,
+ "loss": 2.0128,
+ "step": 998
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.00019310772460394814,
+ "loss": 1.6964,
+ "step": 999
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 0.0001930939424568854,
+ "loss": 1.5864,
+ "step": 1000
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019308014703665712,
+ "loss": 1.8437,
+ "step": 1001
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019306633834523024,
+ "loss": 2.1677,
+ "step": 1002
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019305251638457356,
+ "loss": 1.8872,
+ "step": 1003
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930386811566578,
+ "loss": 1.7312,
+ "step": 1004
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.0001930248326634556,
+ "loss": 1.6772,
+ "step": 1005
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019301097090694143,
+ "loss": 1.9666,
+ "step": 1006
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019299709588909165,
+ "loss": 1.8946,
+ "step": 1007
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 0.00019298320761188453,
+ "loss": 2.1784,
+ "step": 1008
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.0001929693060773003,
+ "loss": 2.0249,
+ "step": 1009
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019295539128732093,
+ "loss": 1.717,
+ "step": 1010
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019294146324393046,
+ "loss": 1.8671,
+ "step": 1011
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019292752194911464,
+ "loss": 1.8388,
+ "step": 1012
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019291356740486123,
+ "loss": 1.9111,
+ "step": 1013
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019289959961315986,
+ "loss": 1.5287,
+ "step": 1014
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.000192885618576002,
+ "loss": 1.5669,
+ "step": 1015
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 0.00019287162429538105,
+ "loss": 1.9095,
+ "step": 1016
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019285761677329232,
+ "loss": 1.9133,
+ "step": 1017
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019284359601173294,
+ "loss": 2.1099,
+ "step": 1018
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.000192829562012702,
+ "loss": 1.6303,
+ "step": 1019
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019281551477820036,
+ "loss": 1.5907,
+ "step": 1020
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019280145431023097,
+ "loss": 1.4897,
+ "step": 1021
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019278738061079845,
+ "loss": 1.7414,
+ "step": 1022
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019277329368190942,
+ "loss": 1.816,
+ "step": 1023
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019275919352557241,
+ "loss": 1.5033,
+ "step": 1024
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 0.00019274508014379777,
+ "loss": 1.7923,
+ "step": 1025
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019273095353859775,
+ "loss": 1.3094,
+ "step": 1026
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019271681371198652,
+ "loss": 1.7689,
+ "step": 1027
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.0001927026606659801,
+ "loss": 1.8019,
+ "step": 1028
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019268849440259639,
+ "loss": 1.8818,
+ "step": 1029
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019267431492385521,
+ "loss": 1.7442,
+ "step": 1030
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019266012223177824,
+ "loss": 2.045,
+ "step": 1031
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019264591632838903,
+ "loss": 1.7842,
+ "step": 1032
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 0.00019263169721571308,
+ "loss": 1.5289,
+ "step": 1033
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019261746489577765,
+ "loss": 1.6013,
+ "step": 1034
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019260321937061202,
+ "loss": 1.7912,
+ "step": 1035
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925889606422473,
+ "loss": 1.7573,
+ "step": 1036
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.0001925746887127164,
+ "loss": 1.7368,
+ "step": 1037
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019256040358405424,
+ "loss": 1.7497,
+ "step": 1038
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019254610525829758,
+ "loss": 2.0042,
+ "step": 1039
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019253179373748504,
+ "loss": 2.0732,
+ "step": 1040
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 0.00019251746902365708,
+ "loss": 1.8878,
+ "step": 1041
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019250313111885618,
+ "loss": 1.9404,
+ "step": 1042
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019248878002512654,
+ "loss": 1.5535,
+ "step": 1043
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019247441574451432,
+ "loss": 1.9344,
+ "step": 1044
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001924600382790676,
+ "loss": 1.9696,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "eval_loss": 2.064669609069824,
+ "eval_runtime": 283.003,
+ "eval_samples_per_second": 0.728,
+ "eval_steps_per_second": 0.728,
+ "step": 1045
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019244564763083624,
+ "loss": 1.4577,
+ "step": 1046
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019243124380187204,
+ "loss": 2.1324,
+ "step": 1047
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019241682679422873,
+ "loss": 1.4713,
+ "step": 1048
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.00019240239660996177,
+ "loss": 1.7455,
+ "step": 1049
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 0.0001923879532511287,
+ "loss": 1.5372,
+ "step": 1050
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019237349671978872,
+ "loss": 2.0984,
+ "step": 1051
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923590270180031,
+ "loss": 1.5023,
+ "step": 1052
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001923445441478348,
+ "loss": 2.0826,
+ "step": 1053
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019233004811134886,
+ "loss": 1.7448,
+ "step": 1054
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019231553891061208,
+ "loss": 2.0249,
+ "step": 1055
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019230101654769312,
+ "loss": 1.6144,
+ "step": 1056
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.0001922864810246626,
+ "loss": 1.9193,
+ "step": 1057
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 0.00019227193234359292,
+ "loss": 2.0057,
+ "step": 1058
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019225737050655842,
+ "loss": 1.9493,
+ "step": 1059
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019224279551563532,
+ "loss": 1.9545,
+ "step": 1060
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001922282073729017,
+ "loss": 1.8983,
+ "step": 1061
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019221360608043746,
+ "loss": 1.9414,
+ "step": 1062
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019219899164032447,
+ "loss": 1.8471,
+ "step": 1063
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921843640546464,
+ "loss": 1.7568,
+ "step": 1064
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.00019216972332548887,
+ "loss": 2.0737,
+ "step": 1065
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 0.0001921550694549393,
+ "loss": 1.6109,
+ "step": 1066
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.000192140402445087,
+ "loss": 1.6684,
+ "step": 1067
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001921257222980232,
+ "loss": 1.5101,
+ "step": 1068
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019211102901584094,
+ "loss": 1.5262,
+ "step": 1069
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920963226006352,
+ "loss": 1.9757,
+ "step": 1070
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019208160305450272,
+ "loss": 2.038,
+ "step": 1071
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019206687037954224,
+ "loss": 1.4755,
+ "step": 1072
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019205212457785434,
+ "loss": 1.7406,
+ "step": 1073
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.00019203736565154137,
+ "loss": 1.9564,
+ "step": 1074
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 0.0001920225936027077,
+ "loss": 1.823,
+ "step": 1075
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001920078084334595,
+ "loss": 1.8275,
+ "step": 1076
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919930101459048,
+ "loss": 1.7106,
+ "step": 1077
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019197819874215347,
+ "loss": 1.5958,
+ "step": 1078
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019196337422431735,
+ "loss": 2.1478,
+ "step": 1079
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.0001919485365945101,
+ "loss": 1.7238,
+ "step": 1080
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019193368585484718,
+ "loss": 2.0758,
+ "step": 1081
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.000191918822007446,
+ "loss": 1.8403,
+ "step": 1082
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 0.00019190394505442585,
+ "loss": 1.8286,
+ "step": 1083
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019188905499790789,
+ "loss": 1.6992,
+ "step": 1084
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019187415184001503,
+ "loss": 1.8512,
+ "step": 1085
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918592355828722,
+ "loss": 1.8236,
+ "step": 1086
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918443062286061,
+ "loss": 1.6173,
+ "step": 1087
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019182936377934535,
+ "loss": 1.8593,
+ "step": 1088
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.0001918144082372204,
+ "loss": 1.8184,
+ "step": 1089
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019179943960436358,
+ "loss": 1.9655,
+ "step": 1090
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 0.00019178445788290915,
+ "loss": 1.5858,
+ "step": 1091
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019176946307499312,
+ "loss": 1.8359,
+ "step": 1092
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917544551827534,
+ "loss": 1.4354,
+ "step": 1093
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019173943420832984,
+ "loss": 1.4312,
+ "step": 1094
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.0001917244001538641,
+ "loss": 2.0024,
+ "step": 1095
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019170935302149965,
+ "loss": 1.5994,
+ "step": 1096
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019169429281338195,
+ "loss": 2.05,
+ "step": 1097
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019167921953165825,
+ "loss": 1.8746,
+ "step": 1098
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019166413317847763,
+ "loss": 2.0071,
+ "step": 1099
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 0.00019164903375599112,
+ "loss": 2.0331,
+ "step": 1100
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019163392126635154,
+ "loss": 1.3587,
+ "step": 1101
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019161879571171362,
+ "loss": 1.6144,
+ "step": 1102
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019160365709423388,
+ "loss": 1.4845,
+ "step": 1103
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019158850541607083,
+ "loss": 1.4511,
+ "step": 1104
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019157334067938474,
+ "loss": 1.8015,
+ "step": 1105
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019155816288633776,
+ "loss": 1.5029,
+ "step": 1106
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019154297203909394,
+ "loss": 1.7102,
+ "step": 1107
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 0.00019152776813981912,
+ "loss": 1.6661,
+ "step": 1108
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001915125511906811,
+ "loss": 1.5872,
+ "step": 1109
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019149732119384943,
+ "loss": 1.7868,
+ "step": 1110
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914820781514956,
+ "loss": 1.6365,
+ "step": 1111
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914668220657929,
+ "loss": 2.3434,
+ "step": 1112
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914515529389166,
+ "loss": 1.6458,
+ "step": 1113
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.0001914362707730437,
+ "loss": 1.7061,
+ "step": 1114
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019142097557035308,
+ "loss": 1.8606,
+ "step": 1115
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 0.00019140566733302552,
+ "loss": 1.9415,
+ "step": 1116
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019139034606324362,
+ "loss": 1.7411,
+ "step": 1117
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019137501176319193,
+ "loss": 1.9404,
+ "step": 1118
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913596644350567,
+ "loss": 1.802,
+ "step": 1119
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019134430408102615,
+ "loss": 1.2244,
+ "step": 1120
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019132893070329036,
+ "loss": 1.902,
+ "step": 1121
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001913135443040412,
+ "loss": 1.4578,
+ "step": 1122
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019129814488547247,
+ "loss": 1.6816,
+ "step": 1123
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.0001912827324497798,
+ "loss": 1.7293,
+ "step": 1124
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 0.00019126730699916061,
+ "loss": 1.6344,
+ "step": 1125
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.0001912518685358143,
+ "loss": 1.6819,
+ "step": 1126
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019123641706194199,
+ "loss": 1.6761,
+ "step": 1127
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019122095257974677,
+ "loss": 1.9222,
+ "step": 1128
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019120547509143354,
+ "loss": 1.6117,
+ "step": 1129
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019118998459920902,
+ "loss": 1.688,
+ "step": 1130
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019117448110528184,
+ "loss": 1.8383,
+ "step": 1131
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019115896461186245,
+ "loss": 1.5225,
+ "step": 1132
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 0.00019114343512116318,
+ "loss": 2.0376,
+ "step": 1133
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019112789263539813,
+ "loss": 1.5632,
+ "step": 1134
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019111233715678343,
+ "loss": 1.7049,
+ "step": 1135
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910967686875369,
+ "loss": 1.4992,
+ "step": 1136
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019108118722987826,
+ "loss": 1.7949,
+ "step": 1137
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019106559278602903,
+ "loss": 1.4688,
+ "step": 1138
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019104998535821274,
+ "loss": 1.4031,
+ "step": 1139
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.0001910343649486546,
+ "loss": 2.1757,
+ "step": 1140
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 0.00019101873155958179,
+ "loss": 1.622,
+ "step": 1141
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019100308519322322,
+ "loss": 1.9441,
+ "step": 1142
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.0001909874258518098,
+ "loss": 1.8065,
+ "step": 1143
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019097175353757417,
+ "loss": 1.8348,
+ "step": 1144
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019095606825275083,
+ "loss": 2.0519,
+ "step": 1145
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019094036999957624,
+ "loss": 1.9172,
+ "step": 1146
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019092465878028854,
+ "loss": 1.9961,
+ "step": 1147
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019090893459712787,
+ "loss": 2.1239,
+ "step": 1148
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019089319745233611,
+ "loss": 1.3481,
+ "step": 1149
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 0.00019087744734815708,
+ "loss": 1.5035,
+ "step": 1150
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019086168428683638,
+ "loss": 1.818,
+ "step": 1151
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019084590827062145,
+ "loss": 2.0481,
+ "step": 1152
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019083011930176165,
+ "loss": 1.4444,
+ "step": 1153
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019081431738250814,
+ "loss": 1.6059,
+ "step": 1154
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907985025151139,
+ "loss": 2.0284,
+ "step": 1155
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.0001907826747018338,
+ "loss": 1.8603,
+ "step": 1156
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019076683394492455,
+ "loss": 1.7189,
+ "step": 1157
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 0.00019075098024664468,
+ "loss": 1.7497,
+ "step": 1158
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019073511360925458,
+ "loss": 1.7489,
+ "step": 1159
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001907192340350165,
+ "loss": 1.6059,
+ "step": 1160
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019070334152619453,
+ "loss": 1.4407,
+ "step": 1161
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019068743608505455,
+ "loss": 1.7025,
+ "step": 1162
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019067151771386438,
+ "loss": 1.7921,
+ "step": 1163
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906555864148936,
+ "loss": 1.6147,
+ "step": 1164
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.0001906396421904137,
+ "loss": 1.6192,
+ "step": 1165
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 0.00019062368504269795,
+ "loss": 1.4341,
+ "step": 1166
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019060771497402147,
+ "loss": 1.3054,
+ "step": 1167
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001905917319866613,
+ "loss": 2.041,
+ "step": 1168
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019057573608289623,
+ "loss": 2.004,
+ "step": 1169
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019055972726500695,
+ "loss": 1.4002,
+ "step": 1170
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019054370553527595,
+ "loss": 1.5554,
+ "step": 1171
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019052767089598754,
+ "loss": 1.9783,
+ "step": 1172
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.000190511623349428,
+ "loss": 1.7443,
+ "step": 1173
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.00019049556289788528,
+ "loss": 1.6089,
+ "step": 1174
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 0.0001904794895436493,
+ "loss": 1.8784,
+ "step": 1175
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904634032890117,
+ "loss": 2.0985,
+ "step": 1176
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904473041362661,
+ "loss": 1.811,
+ "step": 1177
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019043119208770793,
+ "loss": 1.407,
+ "step": 1178
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.0001904150671456343,
+ "loss": 1.7269,
+ "step": 1179
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019039892931234435,
+ "loss": 1.8374,
+ "step": 1180
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019038277859013896,
+ "loss": 1.583,
+ "step": 1181
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019036661498132086,
+ "loss": 1.6407,
+ "step": 1182
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 0.00019035043848819464,
+ "loss": 2.0828,
+ "step": 1183
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019033424911306672,
+ "loss": 1.7067,
+ "step": 1184
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019031804685824534,
+ "loss": 1.55,
+ "step": 1185
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001903018317260406,
+ "loss": 1.7573,
+ "step": 1186
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019028560371876446,
+ "loss": 1.5666,
+ "step": 1187
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.0001902693628387306,
+ "loss": 1.5192,
+ "step": 1188
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019025310908825466,
+ "loss": 2.0093,
+ "step": 1189
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019023684246965406,
+ "loss": 1.8414,
+ "step": 1190
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 0.00019022056298524808,
+ "loss": 1.3696,
+ "step": 1191
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019020427063735782,
+ "loss": 1.6336,
+ "step": 1192
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019018796542830617,
+ "loss": 1.8528,
+ "step": 1193
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019017164736041795,
+ "loss": 2.0523,
+ "step": 1194
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019015531643601973,
+ "loss": 1.7526,
+ "step": 1195
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019013897265743998,
+ "loss": 1.8391,
+ "step": 1196
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019012261602700892,
+ "loss": 1.4257,
+ "step": 1197
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019010624654705867,
+ "loss": 2.0911,
+ "step": 1198
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.0001900898642199232,
+ "loss": 1.7578,
+ "step": 1199
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 0.00019007346904793818,
+ "loss": 1.9003,
+ "step": 1200
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900570610334413,
+ "loss": 1.3918,
+ "step": 1201
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.0001900406401787719,
+ "loss": 2.0365,
+ "step": 1202
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019002420648627131,
+ "loss": 1.5184,
+ "step": 1203
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00019000775995828254,
+ "loss": 1.6412,
+ "step": 1204
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018999130059715058,
+ "loss": 1.5031,
+ "step": 1205
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018997482840522217,
+ "loss": 1.4421,
+ "step": 1206
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.00018995834338484584,
+ "loss": 1.9431,
+ "step": 1207
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 0.000189941845538372,
+ "loss": 1.8141,
+ "step": 1208
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.0001899253348681529,
+ "loss": 1.7289,
+ "step": 1209
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018990881137654258,
+ "loss": 1.7217,
+ "step": 1210
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.000189892275065897,
+ "loss": 2.3727,
+ "step": 1211
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018987572593857381,
+ "loss": 1.4833,
+ "step": 1212
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018985916399693256,
+ "loss": 2.13,
+ "step": 1213
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018984258924333464,
+ "loss": 1.875,
+ "step": 1214
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018982600168014323,
+ "loss": 1.783,
+ "step": 1215
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 0.00018980940130972337,
+ "loss": 1.6815,
+ "step": 1216
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897927881344419,
+ "loss": 2.049,
+ "step": 1217
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018977616215666752,
+ "loss": 1.918,
+ "step": 1218
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897595233787707,
+ "loss": 1.5824,
+ "step": 1219
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018974287180312377,
+ "loss": 1.7473,
+ "step": 1220
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018972620743210093,
+ "loss": 1.6915,
+ "step": 1221
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.0001897095302680781,
+ "loss": 1.7633,
+ "step": 1222
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018969284031343308,
+ "loss": 1.6921,
+ "step": 1223
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018967613757054554,
+ "loss": 1.5433,
+ "step": 1224
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 0.00018965942204179686,
+ "loss": 1.9389,
+ "step": 1225
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018964269372957038,
+ "loss": 1.5625,
+ "step": 1226
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018962595263625115,
+ "loss": 1.4835,
+ "step": 1227
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018960919876422611,
+ "loss": 1.8479,
+ "step": 1228
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018959243211588397,
+ "loss": 1.7861,
+ "step": 1229
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018957565269361531,
+ "loss": 1.867,
+ "step": 1230
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.00018955886049981245,
+ "loss": 1.9383,
+ "step": 1231
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.0001895420555368697,
+ "loss": 1.755,
+ "step": 1232
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 0.000189525237807183,
+ "loss": 1.5166,
+ "step": 1233
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018950840731315024,
+ "loss": 1.8629,
+ "step": 1234
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.000189491564057171,
+ "loss": 1.6845,
+ "step": 1235
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018947470804164685,
+ "loss": 1.4748,
+ "step": 1236
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018945783926898105,
+ "loss": 1.8907,
+ "step": 1237
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018944095774157873,
+ "loss": 1.5758,
+ "step": 1238
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018942406346184683,
+ "loss": 1.6367,
+ "step": 1239
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018940715643219407,
+ "loss": 1.7285,
+ "step": 1240
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 0.00018939023665503108,
+ "loss": 1.5714,
+ "step": 1241
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001893733041327702,
+ "loss": 1.9308,
+ "step": 1242
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018935635886782568,
+ "loss": 1.9153,
+ "step": 1243
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018933940086261351,
+ "loss": 1.8009,
+ "step": 1244
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018932243011955154,
+ "loss": 1.7392,
+ "step": 1245
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018930544664105944,
+ "loss": 1.821,
+ "step": 1246
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.0001892884504295587,
+ "loss": 1.475,
+ "step": 1247
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018927144148747255,
+ "loss": 1.8937,
+ "step": 1248
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018925441981722618,
+ "loss": 1.6958,
+ "step": 1249
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 0.00018923738542124644,
+ "loss": 1.6836,
+ "step": 1250
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018922033830196208,
+ "loss": 2.0213,
+ "step": 1251
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018920327846180365,
+ "loss": 1.9572,
+ "step": 1252
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018918620590320352,
+ "loss": 1.9449,
+ "step": 1253
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018916912062859583,
+ "loss": 1.7297,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "eval_loss": 2.0551259517669678,
+ "eval_runtime": 283.8338,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1254
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018915202264041664,
+ "loss": 1.8158,
+ "step": 1255
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891349119411037,
+ "loss": 1.921,
+ "step": 1256
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.00018911778853309658,
+ "loss": 1.5726,
+ "step": 1257
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 0.0001891006524188368,
+ "loss": 1.6641,
+ "step": 1258
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018908350360076752,
+ "loss": 1.5841,
+ "step": 1259
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018906634208133385,
+ "loss": 1.8567,
+ "step": 1260
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018904916786298257,
+ "loss": 1.5584,
+ "step": 1261
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018903198094816242,
+ "loss": 1.6615,
+ "step": 1262
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018901478133932385,
+ "loss": 1.7477,
+ "step": 1263
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018899756903891914,
+ "loss": 1.3796,
+ "step": 1264
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018898034404940238,
+ "loss": 1.7991,
+ "step": 1265
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 0.00018896310637322953,
+ "loss": 1.4944,
+ "step": 1266
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018894585601285827,
+ "loss": 1.5719,
+ "step": 1267
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018892859297074812,
+ "loss": 1.5495,
+ "step": 1268
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018891131724936043,
+ "loss": 1.7611,
+ "step": 1269
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018889402885115833,
+ "loss": 1.5991,
+ "step": 1270
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.00018887672777860676,
+ "loss": 1.8849,
+ "step": 1271
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888594140341725,
+ "loss": 1.6136,
+ "step": 1272
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888420876203241,
+ "loss": 1.8288,
+ "step": 1273
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 0.0001888247485395319,
+ "loss": 1.6625,
+ "step": 1274
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018880739679426816,
+ "loss": 1.49,
+ "step": 1275
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018879003238700675,
+ "loss": 1.874,
+ "step": 1276
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018877265532022352,
+ "loss": 1.751,
+ "step": 1277
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018875526559639604,
+ "loss": 1.9882,
+ "step": 1278
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018873786321800374,
+ "loss": 1.5214,
+ "step": 1279
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001887204481875278,
+ "loss": 1.741,
+ "step": 1280
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018870302050745118,
+ "loss": 1.7798,
+ "step": 1281
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.00018868558018025878,
+ "loss": 1.9258,
+ "step": 1282
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 0.0001886681272084371,
+ "loss": 1.9096,
+ "step": 1283
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018865066159447466,
+ "loss": 1.6729,
+ "step": 1284
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018863318334086157,
+ "loss": 1.6239,
+ "step": 1285
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018861569245008994,
+ "loss": 1.9857,
+ "step": 1286
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018859818892465354,
+ "loss": 1.9905,
+ "step": 1287
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188580672767048,
+ "loss": 2.0073,
+ "step": 1288
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018856314397977075,
+ "loss": 1.7109,
+ "step": 1289
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.000188545602565321,
+ "loss": 1.3727,
+ "step": 1290
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 0.00018852804852619975,
+ "loss": 1.7045,
+ "step": 1291
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018851048186490992,
+ "loss": 1.9042,
+ "step": 1292
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018849290258395602,
+ "loss": 1.7174,
+ "step": 1293
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018847531068584452,
+ "loss": 1.6502,
+ "step": 1294
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018845770617308366,
+ "loss": 1.8582,
+ "step": 1295
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001884400890481834,
+ "loss": 1.4846,
+ "step": 1296
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018842245931365562,
+ "loss": 1.5428,
+ "step": 1297
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.00018840481697201392,
+ "loss": 1.7266,
+ "step": 1298
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 0.0001883871620257737,
+ "loss": 1.9324,
+ "step": 1299
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018836949447745215,
+ "loss": 1.577,
+ "step": 1300
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001883518143295683,
+ "loss": 1.6388,
+ "step": 1301
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018833412158464298,
+ "loss": 1.9201,
+ "step": 1302
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018831641624519877,
+ "loss": 1.6478,
+ "step": 1303
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018829869831376005,
+ "loss": 1.6826,
+ "step": 1304
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018828096779285303,
+ "loss": 1.8513,
+ "step": 1305
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018826322468500566,
+ "loss": 1.571,
+ "step": 1306
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.00018824546899274777,
+ "loss": 1.1602,
+ "step": 1307
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 0.0001882277007186109,
+ "loss": 1.9998,
+ "step": 1308
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001882099198651284,
+ "loss": 1.7034,
+ "step": 1309
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.0001881921264348355,
+ "loss": 1.4031,
+ "step": 1310
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018817432043026911,
+ "loss": 1.8413,
+ "step": 1311
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018815650185396797,
+ "loss": 1.6606,
+ "step": 1312
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018813867070847264,
+ "loss": 1.5792,
+ "step": 1313
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018812082699632546,
+ "loss": 1.4525,
+ "step": 1314
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018810297072007054,
+ "loss": 1.4906,
+ "step": 1315
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 0.00018808510188225377,
+ "loss": 1.6284,
+ "step": 1316
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880672204854229,
+ "loss": 1.7281,
+ "step": 1317
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001880493265321274,
+ "loss": 1.5345,
+ "step": 1318
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018803142002491856,
+ "loss": 2.0933,
+ "step": 1319
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018801350096634946,
+ "loss": 1.9372,
+ "step": 1320
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.000187995569358975,
+ "loss": 1.7151,
+ "step": 1321
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018797762520535177,
+ "loss": 1.4823,
+ "step": 1322
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.0001879596685080383,
+ "loss": 2.0495,
+ "step": 1323
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 0.00018794169926959474,
+ "loss": 2.2966,
+ "step": 1324
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018792371749258314,
+ "loss": 1.7868,
+ "step": 1325
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018790572317956735,
+ "loss": 1.9403,
+ "step": 1326
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018788771633311292,
+ "loss": 1.6687,
+ "step": 1327
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018786969695578723,
+ "loss": 1.8422,
+ "step": 1328
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018785166505015948,
+ "loss": 1.5916,
+ "step": 1329
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018783362061880062,
+ "loss": 1.9119,
+ "step": 1330
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018781556366428336,
+ "loss": 1.4903,
+ "step": 1331
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018779749418918227,
+ "loss": 1.9497,
+ "step": 1332
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 0.00018777941219607364,
+ "loss": 1.9462,
+ "step": 1333
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018776131768753556,
+ "loss": 2.0474,
+ "step": 1334
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018774321066614795,
+ "loss": 1.4474,
+ "step": 1335
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018772509113449245,
+ "loss": 1.8315,
+ "step": 1336
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018770695909515247,
+ "loss": 1.7684,
+ "step": 1337
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018768881455071332,
+ "loss": 1.2675,
+ "step": 1338
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.000187670657503762,
+ "loss": 1.8226,
+ "step": 1339
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.00018765248795688726,
+ "loss": 2.2112,
+ "step": 1340
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 0.0001876343059126797,
+ "loss": 1.3627,
+ "step": 1341
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018761611137373173,
+ "loss": 2.1488,
+ "step": 1342
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018759790434263744,
+ "loss": 1.9842,
+ "step": 1343
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018757968482199276,
+ "loss": 1.9775,
+ "step": 1344
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018756145281439545,
+ "loss": 1.6835,
+ "step": 1345
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875432083224449,
+ "loss": 1.5272,
+ "step": 1346
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.0001875249513487425,
+ "loss": 1.7539,
+ "step": 1347
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018750668189589117,
+ "loss": 1.874,
+ "step": 1348
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 0.00018748839996649583,
+ "loss": 1.5858,
+ "step": 1349
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018747010556316305,
+ "loss": 1.9298,
+ "step": 1350
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001874517986885012,
+ "loss": 1.5079,
+ "step": 1351
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018743347934512046,
+ "loss": 1.884,
+ "step": 1352
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018741514753563277,
+ "loss": 1.7978,
+ "step": 1353
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873968032626518,
+ "loss": 1.7735,
+ "step": 1354
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018737844652879312,
+ "loss": 1.7227,
+ "step": 1355
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018736007733667393,
+ "loss": 1.8458,
+ "step": 1356
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.00018734169568891334,
+ "loss": 1.3268,
+ "step": 1357
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 0.0001873233015881321,
+ "loss": 1.3782,
+ "step": 1358
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018730489503695287,
+ "loss": 1.9614,
+ "step": 1359
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018728647603800003,
+ "loss": 1.7755,
+ "step": 1360
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018726804459389963,
+ "loss": 1.7961,
+ "step": 1361
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018724960070727972,
+ "loss": 1.7158,
+ "step": 1362
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872311443807699,
+ "loss": 1.6303,
+ "step": 1363
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001872126756170017,
+ "loss": 1.8734,
+ "step": 1364
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.00018719419441860834,
+ "loss": 1.5143,
+ "step": 1365
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 0.0001871757007882248,
+ "loss": 1.498,
+ "step": 1366
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001871571947284879,
+ "loss": 1.0886,
+ "step": 1367
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018713867624203621,
+ "loss": 1.6633,
+ "step": 1368
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018712014533151008,
+ "loss": 1.8895,
+ "step": 1369
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018710160199955156,
+ "loss": 1.4178,
+ "step": 1370
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018708304624880456,
+ "loss": 1.6814,
+ "step": 1371
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.0001870644780819147,
+ "loss": 1.8671,
+ "step": 1372
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018704589750152944,
+ "loss": 1.4786,
+ "step": 1373
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 0.00018702730451029796,
+ "loss": 1.8622,
+ "step": 1374
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018700869911087115,
+ "loss": 1.8891,
+ "step": 1375
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869900813059018,
+ "loss": 2.0493,
+ "step": 1376
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018697145109804436,
+ "loss": 1.7238,
+ "step": 1377
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.00018695280848995513,
+ "loss": 1.7826,
+ "step": 1378
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869341534842921,
+ "loss": 1.8557,
+ "step": 1379
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001869154860837151,
+ "loss": 1.7492,
+ "step": 1380
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868968062908857,
+ "loss": 1.7441,
+ "step": 1381
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868781141084672,
+ "loss": 1.8322,
+ "step": 1382
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 0.0001868594095391247,
+ "loss": 1.8177,
+ "step": 1383
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018684069258552508,
+ "loss": 2.0001,
+ "step": 1384
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018682196325033696,
+ "loss": 1.5046,
+ "step": 1385
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018680322153623075,
+ "loss": 1.6789,
+ "step": 1386
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867844674458786,
+ "loss": 1.6951,
+ "step": 1387
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018676570098195443,
+ "loss": 2.0334,
+ "step": 1388
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018674692214713388,
+ "loss": 1.7833,
+ "step": 1389
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.0001867281309440945,
+ "loss": 1.82,
+ "step": 1390
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 0.00018670932737551547,
+ "loss": 1.8155,
+ "step": 1391
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018669051144407775,
+ "loss": 1.7912,
+ "step": 1392
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018667168315246406,
+ "loss": 1.5816,
+ "step": 1393
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018665284250335895,
+ "loss": 1.7521,
+ "step": 1394
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018663398949944865,
+ "loss": 1.4287,
+ "step": 1395
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018661512414342127,
+ "loss": 1.6026,
+ "step": 1396
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018659624643796647,
+ "loss": 1.6953,
+ "step": 1397
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018657735638577587,
+ "loss": 1.8515,
+ "step": 1398
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 0.00018655845398954276,
+ "loss": 2.0384,
+ "step": 1399
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018653953925196225,
+ "loss": 1.5458,
+ "step": 1400
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018652061217573114,
+ "loss": 1.7166,
+ "step": 1401
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.000186501672763548,
+ "loss": 1.5653,
+ "step": 1402
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018648272101811318,
+ "loss": 2.0928,
+ "step": 1403
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018646375694212884,
+ "loss": 1.605,
+ "step": 1404
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018644478053829878,
+ "loss": 1.4734,
+ "step": 1405
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018642579180932865,
+ "loss": 2.0578,
+ "step": 1406
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018640679075792582,
+ "loss": 1.9823,
+ "step": 1407
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 0.00018638777738679943,
+ "loss": 2.0551,
+ "step": 1408
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018636875169866036,
+ "loss": 1.6315,
+ "step": 1409
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863497136962213,
+ "loss": 1.8965,
+ "step": 1410
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001863306633821966,
+ "loss": 1.3584,
+ "step": 1411
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018631160075930245,
+ "loss": 1.9673,
+ "step": 1412
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018629252583025676,
+ "loss": 1.5277,
+ "step": 1413
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.0001862734385977792,
+ "loss": 1.6788,
+ "step": 1414
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018625433906459116,
+ "loss": 1.432,
+ "step": 1415
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 0.00018623522723341588,
+ "loss": 1.8102,
+ "step": 1416
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018621610310697823,
+ "loss": 1.6713,
+ "step": 1417
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018619696668800492,
+ "loss": 1.6989,
+ "step": 1418
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001861778179792244,
+ "loss": 1.7645,
+ "step": 1419
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018615865698336684,
+ "loss": 1.594,
+ "step": 1420
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018613948370316415,
+ "loss": 1.8751,
+ "step": 1421
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018612029814135014,
+ "loss": 1.64,
+ "step": 1422
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.00018610110030066007,
+ "loss": 1.5066,
+ "step": 1423
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 0.0001860818901838313,
+ "loss": 1.9817,
+ "step": 1424
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018606266779360266,
+ "loss": 2.056,
+ "step": 1425
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860434331327149,
+ "loss": 1.6997,
+ "step": 1426
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018602418620391044,
+ "loss": 1.5573,
+ "step": 1427
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001860049270099335,
+ "loss": 1.8427,
+ "step": 1428
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018598565555353,
+ "loss": 2.012,
+ "step": 1429
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018596637183744763,
+ "loss": 1.7976,
+ "step": 1430
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018594707586443585,
+ "loss": 1.4,
+ "step": 1431
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.0001859277676372458,
+ "loss": 1.8717,
+ "step": 1432
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 0.00018590844715863045,
+ "loss": 1.4311,
+ "step": 1433
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018588911443134448,
+ "loss": 1.5903,
+ "step": 1434
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018586976945814425,
+ "loss": 2.0898,
+ "step": 1435
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018585041224178803,
+ "loss": 1.5302,
+ "step": 1436
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018583104278503568,
+ "loss": 1.9582,
+ "step": 1437
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018581166109064886,
+ "loss": 1.5264,
+ "step": 1438
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018579226716139096,
+ "loss": 1.6551,
+ "step": 1439
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018577286100002723,
+ "loss": 1.7774,
+ "step": 1440
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 0.00018575344260932444,
+ "loss": 1.8316,
+ "step": 1441
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001857340119920513,
+ "loss": 1.3916,
+ "step": 1442
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018571456915097818,
+ "loss": 1.6728,
+ "step": 1443
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856951140888772,
+ "loss": 1.7247,
+ "step": 1444
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018567564680852224,
+ "loss": 1.4539,
+ "step": 1445
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018565616731268888,
+ "loss": 1.613,
+ "step": 1446
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.0001856366756041545,
+ "loss": 1.757,
+ "step": 1447
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018561717168569816,
+ "loss": 1.6903,
+ "step": 1448
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 0.00018559765556010072,
+ "loss": 1.7322,
+ "step": 1449
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018557812723014476,
+ "loss": 1.5627,
+ "step": 1450
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018555858669861458,
+ "loss": 1.8751,
+ "step": 1451
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018553903396829625,
+ "loss": 1.2721,
+ "step": 1452
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018551946904197752,
+ "loss": 1.8167,
+ "step": 1453
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018549989192244797,
+ "loss": 1.6602,
+ "step": 1454
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018548030261249885,
+ "loss": 1.9053,
+ "step": 1455
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018546070111492315,
+ "loss": 1.7721,
+ "step": 1456
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018544108743251566,
+ "loss": 2.1421,
+ "step": 1457
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 0.00018542146156807284,
+ "loss": 1.5076,
+ "step": 1458
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018540182352439288,
+ "loss": 1.9039,
+ "step": 1459
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018538217330427582,
+ "loss": 1.9777,
+ "step": 1460
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018536251091052323,
+ "loss": 1.5702,
+ "step": 1461
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018534283634593862,
+ "loss": 1.851,
+ "step": 1462
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018532314961332717,
+ "loss": 1.5337,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "eval_loss": 2.068387508392334,
+ "eval_runtime": 283.4638,
+ "eval_samples_per_second": 0.727,
+ "eval_steps_per_second": 0.727,
+ "step": 1463
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018530345071549574,
+ "loss": 1.7553,
+ "step": 1464
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018528373965525296,
+ "loss": 1.4175,
+ "step": 1465
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 0.00018526401643540922,
+ "loss": 1.7216,
+ "step": 1466
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018524428105877664,
+ "loss": 1.6415,
+ "step": 1467
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018522453352816896,
+ "loss": 1.7284,
+ "step": 1468
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018520477384640187,
+ "loss": 1.8314,
+ "step": 1469
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018518500201629258,
+ "loss": 1.8341,
+ "step": 1470
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018516521804066015,
+ "loss": 1.4129,
+ "step": 1471
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018514542192232537,
+ "loss": 1.4671,
+ "step": 1472
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018512561366411067,
+ "loss": 1.6665,
+ "step": 1473
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 0.00018510579326884034,
+ "loss": 1.5722,
+ "step": 1474
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850859607393403,
+ "loss": 1.9348,
+ "step": 1475
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850661160784383,
+ "loss": 1.5404,
+ "step": 1476
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018504625928896363,
+ "loss": 1.4769,
+ "step": 1477
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018502639037374757,
+ "loss": 1.4149,
+ "step": 1478
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001850065093356229,
+ "loss": 1.958,
+ "step": 1479
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018498661617742426,
+ "loss": 1.8319,
+ "step": 1480
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018496671090198797,
+ "loss": 1.5948,
+ "step": 1481
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.0001849467935121521,
+ "loss": 1.8469,
+ "step": 1482
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 0.00018492686401075644,
+ "loss": 1.6798,
+ "step": 1483
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001849069224006425,
+ "loss": 1.8197,
+ "step": 1484
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848869686846535,
+ "loss": 1.6613,
+ "step": 1485
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001848670028656344,
+ "loss": 1.7322,
+ "step": 1486
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018484702494643188,
+ "loss": 2.0493,
+ "step": 1487
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018482703492989444,
+ "loss": 1.7182,
+ "step": 1488
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018480703281887215,
+ "loss": 1.689,
+ "step": 1489
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.00018478701861621686,
+ "loss": 1.9477,
+ "step": 1490
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 0.0001847669923247822,
+ "loss": 1.8171,
+ "step": 1491
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018474695394742345,
+ "loss": 1.7337,
+ "step": 1492
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847269034869977,
+ "loss": 1.6983,
+ "step": 1493
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001847068409463636,
+ "loss": 1.6445,
+ "step": 1494
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846867663283818,
+ "loss": 1.9965,
+ "step": 1495
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846666796359143,
+ "loss": 1.6775,
+ "step": 1496
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.0001846465808718252,
+ "loss": 1.8117,
+ "step": 1497
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018462647003898006,
+ "loss": 1.8803,
+ "step": 1498
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 0.00018460634714024624,
+ "loss": 1.3045,
+ "step": 1499
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018458621217849286,
+ "loss": 1.7768,
+ "step": 1500
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018456606515659073,
+ "loss": 2.0641,
+ "step": 1501
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001845459060774123,
+ "loss": 1.3804,
+ "step": 1502
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018452573494383192,
+ "loss": 1.6271,
+ "step": 1503
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018450555175872547,
+ "loss": 1.8525,
+ "step": 1504
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018448535652497073,
+ "loss": 1.5303,
+ "step": 1505
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.000184465149245447,
+ "loss": 2.0368,
+ "step": 1506
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.00018444492992303544,
+ "loss": 1.9951,
+ "step": 1507
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 0.0001844246985606189,
+ "loss": 1.8715,
+ "step": 1508
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018440445516108186,
+ "loss": 1.7373,
+ "step": 1509
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018438419972731067,
+ "loss": 1.7667,
+ "step": 1510
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018436393226219327,
+ "loss": 1.5134,
+ "step": 1511
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018434365276861938,
+ "loss": 1.3891,
+ "step": 1512
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001843233612494804,
+ "loss": 1.7066,
+ "step": 1513
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.00018430305770766948,
+ "loss": 1.6366,
+ "step": 1514
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842827421460814,
+ "loss": 1.7838,
+ "step": 1515
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 0.0001842624145676128,
+ "loss": 1.7884,
+ "step": 1516
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001842420749751619,
+ "loss": 1.8428,
+ "step": 1517
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018422172337162867,
+ "loss": 1.4987,
+ "step": 1518
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018420135975991483,
+ "loss": 1.7576,
+ "step": 1519
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001841809841429238,
+ "loss": 1.8522,
+ "step": 1520
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018416059652356066,
+ "loss": 1.9308,
+ "step": 1521
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018414019690473227,
+ "loss": 1.4658,
+ "step": 1522
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.00018411978528934717,
+ "loss": 1.7072,
+ "step": 1523
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 0.0001840993616803156,
+ "loss": 1.736,
+ "step": 1524
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840789260805495,
+ "loss": 1.7712,
+ "step": 1525
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001840584784929626,
+ "loss": 1.2231,
+ "step": 1526
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018403801892047023,
+ "loss": 1.8421,
+ "step": 1527
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018401754736598947,
+ "loss": 1.2689,
+ "step": 1528
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018399706383243918,
+ "loss": 1.8062,
+ "step": 1529
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839765683227398,
+ "loss": 1.6846,
+ "step": 1530
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.0001839560608398136,
+ "loss": 1.8201,
+ "step": 1531
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018393554138658441,
+ "loss": 1.6958,
+ "step": 1532
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 0.00018391500996597796,
+ "loss": 1.8487,
+ "step": 1533
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.0001838944665809215,
+ "loss": 1.9788,
+ "step": 1534
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018387391123434412,
+ "loss": 1.6002,
+ "step": 1535
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018385334392917658,
+ "loss": 1.3859,
+ "step": 1536
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018383276466835127,
+ "loss": 2.0743,
+ "step": 1537
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018381217345480235,
+ "loss": 1.8357,
+ "step": 1538
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018379157029146573,
+ "loss": 1.7002,
+ "step": 1539
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018377095518127897,
+ "loss": 1.3058,
+ "step": 1540
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 0.00018375032812718124,
+ "loss": 1.8745,
+ "step": 1541
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018372968913211364,
+ "loss": 1.7847,
+ "step": 1542
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018370903819901874,
+ "loss": 1.8156,
+ "step": 1543
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018368837533084095,
+ "loss": 2.0152,
+ "step": 1544
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018366770053052634,
+ "loss": 1.5656,
+ "step": 1545
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018364701380102266,
+ "loss": 1.5753,
+ "step": 1546
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018362631514527947,
+ "loss": 1.3938,
+ "step": 1547
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018360560456624788,
+ "loss": 1.9599,
+ "step": 1548
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 0.00018358488206688075,
+ "loss": 1.8641,
+ "step": 1549
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018356414765013267,
+ "loss": 1.8428,
+ "step": 1550
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018354340131895998,
+ "loss": 1.6016,
+ "step": 1551
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018352264307632056,
+ "loss": 1.5768,
+ "step": 1552
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018350187292517415,
+ "loss": 1.5369,
+ "step": 1553
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.0001834810908684821,
+ "loss": 1.9717,
+ "step": 1554
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018346029690920746,
+ "loss": 1.943,
+ "step": 1555
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018343949105031505,
+ "loss": 1.8166,
+ "step": 1556
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018341867329477125,
+ "loss": 1.7149,
+ "step": 1557
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 0.00018339784364554426,
+ "loss": 1.4657,
+ "step": 1558
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018337700210560397,
+ "loss": 1.8693,
+ "step": 1559
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018335614867792183,
+ "loss": 1.7656,
+ "step": 1560
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001833352833654712,
+ "loss": 1.5123,
+ "step": 1561
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018331440617122696,
+ "loss": 1.7884,
+ "step": 1562
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832935170981657,
+ "loss": 1.7309,
+ "step": 1563
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018327261614926583,
+ "loss": 1.9628,
+ "step": 1564
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.00018325170332750732,
+ "loss": 1.6409,
+ "step": 1565
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 0.0001832307786358719,
+ "loss": 1.6093,
+ "step": 1566
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018320984207734298,
+ "loss": 1.6111,
+ "step": 1567
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018318889365490565,
+ "loss": 2.0085,
+ "step": 1568
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018316793337154664,
+ "loss": 2.079,
+ "step": 1569
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018314696123025454,
+ "loss": 1.5466,
+ "step": 1570
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018312597723401942,
+ "loss": 2.0825,
+ "step": 1571
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.0001831049813858332,
+ "loss": 1.9748,
+ "step": 1572
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018308397368868945,
+ "loss": 1.6529,
+ "step": 1573
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 0.00018306295414558335,
+ "loss": 1.7119,
+ "step": 1574
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018304192275951184,
+ "loss": 1.8812,
+ "step": 1575
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018302087953347352,
+ "loss": 1.8676,
+ "step": 1576
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018299982447046877,
+ "loss": 1.879,
+ "step": 1577
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018297875757349952,
+ "loss": 1.6282,
+ "step": 1578
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018295767884556947,
+ "loss": 1.735,
+ "step": 1579
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018293658828968397,
+ "loss": 1.5796,
+ "step": 1580
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018291548590885007,
+ "loss": 1.8258,
+ "step": 1581
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018289437170607658,
+ "loss": 1.7531,
+ "step": 1582
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 0.00018287324568437381,
+ "loss": 1.6265,
+ "step": 1583
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018285210784675394,
+ "loss": 1.7997,
+ "step": 1584
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018283095819623078,
+ "loss": 1.955,
+ "step": 1585
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018280979673581977,
+ "loss": 1.6542,
+ "step": 1586
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018278862346853808,
+ "loss": 1.7634,
+ "step": 1587
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018276743839740458,
+ "loss": 2.0077,
+ "step": 1588
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018274624152543977,
+ "loss": 2.0254,
+ "step": 1589
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018272503285566587,
+ "loss": 1.4464,
+ "step": 1590
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 0.00018270381239110677,
+ "loss": 1.8643,
+ "step": 1591
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018268258013478804,
+ "loss": 1.3278,
+ "step": 1592
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018266133608973696,
+ "loss": 1.744,
+ "step": 1593
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018264008025898248,
+ "loss": 1.5079,
+ "step": 1594
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018261881264555516,
+ "loss": 1.9655,
+ "step": 1595
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.0001825975332524873,
+ "loss": 2.0557,
+ "step": 1596
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.000182576242082813,
+ "loss": 1.7174,
+ "step": 1597
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018255493913956774,
+ "loss": 1.449,
+ "step": 1598
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 0.00018253362442578896,
+ "loss": 1.9058,
+ "step": 1599
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018251229794451567,
+ "loss": 1.3482,
+ "step": 1600
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018249095969878853,
+ "loss": 1.7906,
+ "step": 1601
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018246960969164994,
+ "loss": 1.6177,
+ "step": 1602
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018244824792614393,
+ "loss": 1.5786,
+ "step": 1603
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018242687440531618,
+ "loss": 1.6451,
+ "step": 1604
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018240548913221416,
+ "loss": 1.3695,
+ "step": 1605
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.0001823840921098869,
+ "loss": 1.6648,
+ "step": 1606
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018236268334138515,
+ "loss": 2.1548,
+ "step": 1607
+ },
+ {
+ "epoch": 1.91,
+ "learning_rate": 0.00018234126282976133,
+ "loss": 1.6153,
+ "step": 1608
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001823198305780696,
+ "loss": 1.741,
+ "step": 1609
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018229838658936564,
+ "loss": 1.7827,
+ "step": 1610
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018227693086670697,
+ "loss": 1.7343,
+ "step": 1611
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018225546341315261,
+ "loss": 1.8149,
+ "step": 1612
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001822339842317635,
+ "loss": 1.5497,
+ "step": 1613
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018221249332560198,
+ "loss": 1.7659,
+ "step": 1614
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.0001821909906977322,
+ "loss": 1.8992,
+ "step": 1615
+ },
+ {
+ "epoch": 1.92,
+ "learning_rate": 0.00018216947635122,
+ "loss": 1.8682,
+ "step": 1616
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018214795028913288,
+ "loss": 1.9774,
+ "step": 1617
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001821264125145399,
+ "loss": 1.9441,
+ "step": 1618
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018210486303051195,
+ "loss": 2.0314,
+ "step": 1619
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.0001820833018401215,
+ "loss": 1.8234,
+ "step": 1620
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018206172894644272,
+ "loss": 1.9478,
+ "step": 1621
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018204014435255135,
+ "loss": 1.7894,
+ "step": 1622
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.000182018548061525,
+ "loss": 1.5469,
+ "step": 1623
+ },
+ {
+ "epoch": 1.93,
+ "learning_rate": 0.00018199694007644277,
+ "loss": 1.9419,
+ "step": 1624
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018197532040038547,
+ "loss": 1.6686,
+ "step": 1625
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018195368903643563,
+ "loss": 2.2525,
+ "step": 1626
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018193204598767744,
+ "loss": 1.8076,
+ "step": 1627
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018191039125719662,
+ "loss": 1.976,
+ "step": 1628
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018188872484808076,
+ "loss": 1.6896,
+ "step": 1629
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018186704676341898,
+ "loss": 1.6784,
+ "step": 1630
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018184535700630213,
+ "loss": 1.9634,
+ "step": 1631
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018182365557982264,
+ "loss": 1.7406,
+ "step": 1632
+ },
+ {
+ "epoch": 1.94,
+ "learning_rate": 0.00018180194248707473,
+ "loss": 1.7492,
+ "step": 1633
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018178021773115414,
+ "loss": 1.7731,
+ "step": 1634
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018175848131515837,
+ "loss": 1.6232,
+ "step": 1635
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817367332421866,
+ "loss": 1.7488,
+ "step": 1636
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.0001817149735153396,
+ "loss": 1.3398,
+ "step": 1637
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018169320213771983,
+ "loss": 1.4521,
+ "step": 1638
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018167141911243145,
+ "loss": 1.6311,
+ "step": 1639
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018164962444258014,
+ "loss": 1.8911,
+ "step": 1640
+ },
+ {
+ "epoch": 1.95,
+ "learning_rate": 0.00018162781813127346,
+ "loss": 1.9879,
+ "step": 1641
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001816060001816205,
+ "loss": 1.5637,
+ "step": 1642
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018158417059673196,
+ "loss": 1.7461,
+ "step": 1643
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815623293797203,
+ "loss": 1.6671,
+ "step": 1644
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815404765336996,
+ "loss": 1.2124,
+ "step": 1645
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001815186120617856,
+ "loss": 1.6402,
+ "step": 1646
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.0001814967359670957,
+ "loss": 1.8837,
+ "step": 1647
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018147484825274893,
+ "loss": 1.8027,
+ "step": 1648
+ },
+ {
+ "epoch": 1.96,
+ "learning_rate": 0.00018145294892186605,
+ "loss": 1.7684,
+ "step": 1649
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.0001814310379775694,
+ "loss": 1.8274,
+ "step": 1650
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.000181409115422983,
+ "loss": 1.8292,
+ "step": 1651
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018138718126123248,
+ "loss": 1.3492,
+ "step": 1652
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018136523549544523,
+ "loss": 1.509,
+ "step": 1653
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018134327812875024,
+ "loss": 1.7415,
+ "step": 1654
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018132130916427816,
+ "loss": 1.5223,
+ "step": 1655
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018129932860516126,
+ "loss": 1.9294,
+ "step": 1656
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018127733645453348,
+ "loss": 2.0716,
+ "step": 1657
+ },
+ {
+ "epoch": 1.97,
+ "learning_rate": 0.00018125533271553043,
+ "loss": 1.57,
+ "step": 1658
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018123331739128938,
+ "loss": 2.2132,
+ "step": 1659
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018121129048494922,
+ "loss": 1.9006,
+ "step": 1660
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018118925199965048,
+ "loss": 1.9319,
+ "step": 1661
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018116720193853543,
+ "loss": 1.8103,
+ "step": 1662
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018114514030474787,
+ "loss": 1.7028,
+ "step": 1663
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.00018112306710143334,
+ "loss": 1.802,
+ "step": 1664
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.000181100982331739,
+ "loss": 1.6835,
+ "step": 1665
+ },
+ {
+ "epoch": 1.98,
+ "learning_rate": 0.0001810788859988136,
+ "loss": 1.7223,
+ "step": 1666
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810567781058077,
+ "loss": 1.5829,
+ "step": 1667
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018103465865587333,
+ "loss": 1.9863,
+ "step": 1668
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.0001810125276521642,
+ "loss": 1.6398,
+ "step": 1669
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018099038509783582,
+ "loss": 1.9261,
+ "step": 1670
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018096823099604517,
+ "loss": 1.8882,
+ "step": 1671
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018094606534995093,
+ "loss": 1.6716,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "eval_loss": 2.075261354446411,
+ "eval_runtime": 283.9438,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 1672
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018092388816271345,
+ "loss": 1.6688,
+ "step": 1673
+ },
+ {
+ "epoch": 1.99,
+ "learning_rate": 0.00018090169943749476,
+ "loss": 1.9127,
+ "step": 1674
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001808794991774584,
+ "loss": 1.7214,
+ "step": 1675
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018085728738576973,
+ "loss": 1.785,
+ "step": 1676
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018083506406559561,
+ "loss": 1.5287,
+ "step": 1677
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018081282922010464,
+ "loss": 1.9012,
+ "step": 1678
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018079058285246698,
+ "loss": 1.3094,
+ "step": 1679
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807683249658545,
+ "loss": 1.818,
+ "step": 1680
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807460555634407,
+ "loss": 1.9389,
+ "step": 1681
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001807237746484007,
+ "loss": 1.4334,
+ "step": 1682
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018070148222391126,
+ "loss": 1.5422,
+ "step": 1683
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806791782931508,
+ "loss": 1.7899,
+ "step": 1684
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806568628592994,
+ "loss": 1.6106,
+ "step": 1685
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018063453592553872,
+ "loss": 1.9807,
+ "step": 1686
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001806121974950521,
+ "loss": 1.1762,
+ "step": 1687
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018058984757102456,
+ "loss": 1.8338,
+ "step": 1688
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001805674861566426,
+ "loss": 1.5556,
+ "step": 1689
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805451132550946,
+ "loss": 0.87,
+ "step": 1690
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018052272886957038,
+ "loss": 1.0386,
+ "step": 1691
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.0001805003330032615,
+ "loss": 0.8153,
+ "step": 1692
+ },
+ {
+ "epoch": 2.0,
+ "learning_rate": 0.00018047792565936102,
+ "loss": 1.1745,
+ "step": 1693
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018045550684106388,
+ "loss": 1.1584,
+ "step": 1694
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018043307655156644,
+ "loss": 1.0742,
+ "step": 1695
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018041063479406675,
+ "loss": 1.0537,
+ "step": 1696
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803881815717646,
+ "loss": 1.0239,
+ "step": 1697
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.0001803657168878612,
+ "loss": 0.9182,
+ "step": 1698
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018034324074555965,
+ "loss": 1.1856,
+ "step": 1699
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018032075314806448,
+ "loss": 1.3285,
+ "step": 1700
+ },
+ {
+ "epoch": 2.01,
+ "learning_rate": 0.00018029825409858198,
+ "loss": 1.2912,
+ "step": 1701
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018027574360032,
+ "loss": 1.3666,
+ "step": 1702
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018025322165648807,
+ "loss": 0.9621,
+ "step": 1703
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018023068827029723,
+ "loss": 0.8484,
+ "step": 1704
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018020814344496037,
+ "loss": 1.2236,
+ "step": 1705
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018018558718369186,
+ "loss": 0.8155,
+ "step": 1706
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.0001801630194897077,
+ "loss": 1.2047,
+ "step": 1707
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018014044036622555,
+ "loss": 1.0269,
+ "step": 1708
+ },
+ {
+ "epoch": 2.02,
+ "learning_rate": 0.00018011784981646474,
+ "loss": 1.0536,
+ "step": 1709
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018009524784364615,
+ "loss": 1.0516,
+ "step": 1710
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018007263445099235,
+ "loss": 0.9087,
+ "step": 1711
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001800500096417275,
+ "loss": 1.3057,
+ "step": 1712
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018002737341907743,
+ "loss": 0.8791,
+ "step": 1713
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00018000472578626956,
+ "loss": 1.1667,
+ "step": 1714
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017998206674653294,
+ "loss": 1.1026,
+ "step": 1715
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017995939630309826,
+ "loss": 1.3228,
+ "step": 1716
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.0001799367144591978,
+ "loss": 0.9173,
+ "step": 1717
+ },
+ {
+ "epoch": 2.03,
+ "learning_rate": 0.00017991402121806557,
+ "loss": 1.0067,
+ "step": 1718
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798913165829371,
+ "loss": 1.0256,
+ "step": 1719
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017986860055704953,
+ "loss": 0.7645,
+ "step": 1720
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.0001798458731436417,
+ "loss": 1.0567,
+ "step": 1721
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017982313434595406,
+ "loss": 0.7465,
+ "step": 1722
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017980038416722863,
+ "loss": 1.3268,
+ "step": 1723
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017977762261070916,
+ "loss": 0.9917,
+ "step": 1724
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017975484967964087,
+ "loss": 0.8592,
+ "step": 1725
+ },
+ {
+ "epoch": 2.04,
+ "learning_rate": 0.00017973206537727073,
+ "loss": 1.43,
+ "step": 1726
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017970926970684725,
+ "loss": 1.3679,
+ "step": 1727
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017968646267162063,
+ "loss": 1.2959,
+ "step": 1728
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017966364427484267,
+ "loss": 1.0674,
+ "step": 1729
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017964081451976672,
+ "loss": 1.1153,
+ "step": 1730
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017961797340964783,
+ "loss": 1.0586,
+ "step": 1731
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017959512094774266,
+ "loss": 1.2388,
+ "step": 1732
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.00017957225713730949,
+ "loss": 1.257,
+ "step": 1733
+ },
+ {
+ "epoch": 2.05,
+ "learning_rate": 0.0001795493819816081,
+ "loss": 1.099,
+ "step": 1734
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001795264954839001,
+ "loss": 0.9532,
+ "step": 1735
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017950359764744859,
+ "loss": 1.2553,
+ "step": 1736
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017948068847551825,
+ "loss": 0.9973,
+ "step": 1737
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017945776797137543,
+ "loss": 1.0637,
+ "step": 1738
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017943483613828815,
+ "loss": 1.1815,
+ "step": 1739
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017941189297952597,
+ "loss": 0.8378,
+ "step": 1740
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017938893849836002,
+ "loss": 0.9375,
+ "step": 1741
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.00017936597269806322,
+ "loss": 0.9653,
+ "step": 1742
+ },
+ {
+ "epoch": 2.06,
+ "learning_rate": 0.0001793429955819099,
+ "loss": 1.221,
+ "step": 1743
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017932000715317612,
+ "loss": 1.041,
+ "step": 1744
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017929700741513955,
+ "loss": 1.0724,
+ "step": 1745
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017927399637107945,
+ "loss": 1.1102,
+ "step": 1746
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017925097402427667,
+ "loss": 0.8542,
+ "step": 1747
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001792279403780137,
+ "loss": 1.2339,
+ "step": 1748
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.00017920489543557465,
+ "loss": 0.8671,
+ "step": 1749
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791818392002452,
+ "loss": 0.9779,
+ "step": 1750
+ },
+ {
+ "epoch": 2.07,
+ "learning_rate": 0.0001791587716753127,
+ "loss": 1.1242,
+ "step": 1751
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017913569286406603,
+ "loss": 0.9043,
+ "step": 1752
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001791126027697958,
+ "loss": 0.7996,
+ "step": 1753
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017908950139579406,
+ "loss": 0.8602,
+ "step": 1754
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017906638874535462,
+ "loss": 1.0161,
+ "step": 1755
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017904326482177284,
+ "loss": 0.8226,
+ "step": 1756
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017902012962834566,
+ "loss": 1.3885,
+ "step": 1757
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.0001789969831683717,
+ "loss": 1.2158,
+ "step": 1758
+ },
+ {
+ "epoch": 2.08,
+ "learning_rate": 0.00017897382544515108,
+ "loss": 1.3261,
+ "step": 1759
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017895065646198567,
+ "loss": 1.2144,
+ "step": 1760
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017892747622217875,
+ "loss": 0.9881,
+ "step": 1761
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001789042847290354,
+ "loss": 1.0342,
+ "step": 1762
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017888108198586217,
+ "loss": 0.7883,
+ "step": 1763
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017885786799596732,
+ "loss": 0.9006,
+ "step": 1764
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017883464276266064,
+ "loss": 1.3695,
+ "step": 1765
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001788114062892535,
+ "loss": 1.0303,
+ "step": 1766
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.00017878815857905897,
+ "loss": 1.3816,
+ "step": 1767
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 0.0001787648996353916,
+ "loss": 0.8684,
+ "step": 1768
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017874162946156772,
+ "loss": 1.1157,
+ "step": 1769
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017871834806090501,
+ "loss": 1.0087,
+ "step": 1770
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.000178695055436723,
+ "loss": 0.7173,
+ "step": 1771
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017867175159234265,
+ "loss": 1.4784,
+ "step": 1772
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017864843653108662,
+ "loss": 1.1401,
+ "step": 1773
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786251102562791,
+ "loss": 1.0952,
+ "step": 1774
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.0001786017727712459,
+ "loss": 0.9443,
+ "step": 1775
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 0.00017857842407931445,
+ "loss": 1.0682,
+ "step": 1776
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785550641838138,
+ "loss": 0.9402,
+ "step": 1777
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017853169308807448,
+ "loss": 1.0576,
+ "step": 1778
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001785083107954288,
+ "loss": 1.1425,
+ "step": 1779
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017848491730921046,
+ "loss": 1.1402,
+ "step": 1780
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017846151263275494,
+ "loss": 1.4482,
+ "step": 1781
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017843809676939922,
+ "loss": 0.7765,
+ "step": 1782
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.00017841466972248188,
+ "loss": 1.1478,
+ "step": 1783
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 0.0001783912314953431,
+ "loss": 1.1876,
+ "step": 1784
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017836778209132464,
+ "loss": 1.2036,
+ "step": 1785
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783443215137699,
+ "loss": 1.0297,
+ "step": 1786
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001783208497660239,
+ "loss": 0.8186,
+ "step": 1787
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017829736685143308,
+ "loss": 0.7258,
+ "step": 1788
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017827387277334568,
+ "loss": 0.8072,
+ "step": 1789
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017825036753511144,
+ "loss": 1.0474,
+ "step": 1790
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017822685114008167,
+ "loss": 1.2141,
+ "step": 1791
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.00017820332359160928,
+ "loss": 1.1443,
+ "step": 1792
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 0.0001781797848930488,
+ "loss": 0.9864,
+ "step": 1793
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017815623504775636,
+ "loss": 1.2998,
+ "step": 1794
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001781326740590896,
+ "loss": 1.0672,
+ "step": 1795
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017810910193040785,
+ "loss": 0.9152,
+ "step": 1796
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.000178085518665072,
+ "loss": 1.2555,
+ "step": 1797
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017806192426644444,
+ "loss": 1.2085,
+ "step": 1798
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017803831873788926,
+ "loss": 1.6205,
+ "step": 1799
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.0001780147020827721,
+ "loss": 1.3382,
+ "step": 1800
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 0.00017799107430446016,
+ "loss": 1.3309,
+ "step": 1801
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017796743540632223,
+ "loss": 1.2556,
+ "step": 1802
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017794378539172877,
+ "loss": 0.829,
+ "step": 1803
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017792012426405166,
+ "loss": 1.1711,
+ "step": 1804
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017789645202666456,
+ "loss": 1.0128,
+ "step": 1805
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017787276868294253,
+ "loss": 1.2074,
+ "step": 1806
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.00017784907423626237,
+ "loss": 1.0996,
+ "step": 1807
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778253686900023,
+ "loss": 0.9608,
+ "step": 1808
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 0.0001778016520475423,
+ "loss": 0.827,
+ "step": 1809
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017777792431226383,
+ "loss": 1.2365,
+ "step": 1810
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017775418548754993,
+ "loss": 1.0276,
+ "step": 1811
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777304355767852,
+ "loss": 0.8178,
+ "step": 1812
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001777066745833559,
+ "loss": 1.1297,
+ "step": 1813
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017768290251064987,
+ "loss": 1.1737,
+ "step": 1814
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017765911936205644,
+ "loss": 1.1606,
+ "step": 1815
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017763532514096658,
+ "loss": 1.2605,
+ "step": 1816
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.0001776115198507728,
+ "loss": 1.2271,
+ "step": 1817
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 0.00017758770349486923,
+ "loss": 0.9407,
+ "step": 1818
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001775638760766516,
+ "loss": 1.0273,
+ "step": 1819
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017754003759951715,
+ "loss": 1.0746,
+ "step": 1820
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017751618806686472,
+ "loss": 1.0091,
+ "step": 1821
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017749232748209473,
+ "loss": 0.997,
+ "step": 1822
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.0001774684558486092,
+ "loss": 1.4814,
+ "step": 1823
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017744457316981168,
+ "loss": 1.1407,
+ "step": 1824
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017742067944910737,
+ "loss": 0.9824,
+ "step": 1825
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 0.00017739677468990293,
+ "loss": 1.2603,
+ "step": 1826
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017737285889560668,
+ "loss": 1.3721,
+ "step": 1827
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017734893206962853,
+ "loss": 1.1186,
+ "step": 1828
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017732499421537984,
+ "loss": 0.7693,
+ "step": 1829
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001773010453362737,
+ "loss": 1.0449,
+ "step": 1830
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017727708543572467,
+ "loss": 0.9331,
+ "step": 1831
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.0001772531145171489,
+ "loss": 0.739,
+ "step": 1832
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017722913258396417,
+ "loss": 0.9076,
+ "step": 1833
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 0.00017720513963958968,
+ "loss": 1.3464,
+ "step": 1834
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017718113568744638,
+ "loss": 0.8858,
+ "step": 1835
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017715712073095672,
+ "loss": 1.3204,
+ "step": 1836
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017713309477354467,
+ "loss": 1.0538,
+ "step": 1837
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001771090578186358,
+ "loss": 1.44,
+ "step": 1838
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770850098696573,
+ "loss": 1.0167,
+ "step": 1839
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017706095093003785,
+ "loss": 0.9724,
+ "step": 1840
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017703688100320774,
+ "loss": 0.8055,
+ "step": 1841
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.0001770128000925988,
+ "loss": 0.7363,
+ "step": 1842
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 0.00017698870820164446,
+ "loss": 1.1329,
+ "step": 1843
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017696460533377968,
+ "loss": 0.9487,
+ "step": 1844
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017694049149244104,
+ "loss": 1.2571,
+ "step": 1845
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001769163666810666,
+ "loss": 0.9148,
+ "step": 1846
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017689223090309607,
+ "loss": 1.4676,
+ "step": 1847
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017686808416197072,
+ "loss": 0.9395,
+ "step": 1848
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017684392646113325,
+ "loss": 0.9632,
+ "step": 1849
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.00017681975780402807,
+ "loss": 1.0037,
+ "step": 1850
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 0.0001767955781941011,
+ "loss": 0.9557,
+ "step": 1851
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017677138763479985,
+ "loss": 1.2799,
+ "step": 1852
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017674718612957336,
+ "loss": 0.8461,
+ "step": 1853
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001767229736818722,
+ "loss": 1.2762,
+ "step": 1854
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017669875029514856,
+ "loss": 1.4801,
+ "step": 1855
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017667451597285617,
+ "loss": 0.9849,
+ "step": 1856
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766502707184503,
+ "loss": 1.0875,
+ "step": 1857
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.00017662601453538783,
+ "loss": 0.8346,
+ "step": 1858
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 0.0001766017474271271,
+ "loss": 1.1933,
+ "step": 1859
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017657746939712815,
+ "loss": 0.8789,
+ "step": 1860
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017655318044885245,
+ "loss": 1.0091,
+ "step": 1861
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.0001765288805857631,
+ "loss": 0.7371,
+ "step": 1862
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017650456981132466,
+ "loss": 0.8131,
+ "step": 1863
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017648024812900342,
+ "loss": 1.0795,
+ "step": 1864
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.000176455915542267,
+ "loss": 0.9882,
+ "step": 1865
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017643157205458483,
+ "loss": 1.212,
+ "step": 1866
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017640721766942768,
+ "loss": 1.4755,
+ "step": 1867
+ },
+ {
+ "epoch": 2.21,
+ "learning_rate": 0.00017638285239026798,
+ "loss": 1.0391,
+ "step": 1868
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017635847622057965,
+ "loss": 1.2568,
+ "step": 1869
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017633408916383826,
+ "loss": 1.2138,
+ "step": 1870
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001763096912235208,
+ "loss": 1.196,
+ "step": 1871
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017628528240310596,
+ "loss": 1.1476,
+ "step": 1872
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017626086270607384,
+ "loss": 1.1421,
+ "step": 1873
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017623643213590619,
+ "loss": 1.0711,
+ "step": 1874
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.0001762119906960863,
+ "loss": 0.8842,
+ "step": 1875
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 0.00017618753839009893,
+ "loss": 0.798,
+ "step": 1876
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761630752214305,
+ "loss": 0.8591,
+ "step": 1877
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017613860119356883,
+ "loss": 0.7646,
+ "step": 1878
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001761141163100035,
+ "loss": 1.4113,
+ "step": 1879
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017608962057422549,
+ "loss": 0.8605,
+ "step": 1880
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017606511398972731,
+ "loss": 0.6179,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "eval_loss": 2.3971996307373047,
+ "eval_runtime": 283.7444,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 1881
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760405965600031,
+ "loss": 0.8651,
+ "step": 1882
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.0001760160682885485,
+ "loss": 1.3178,
+ "step": 1883
+ },
+ {
+ "epoch": 2.23,
+ "learning_rate": 0.00017599152917886071,
+ "loss": 0.9233,
+ "step": 1884
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017596697923443847,
+ "loss": 0.9126,
+ "step": 1885
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001759424184587821,
+ "loss": 0.9749,
+ "step": 1886
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017591784685539334,
+ "loss": 1.1929,
+ "step": 1887
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017589326442777565,
+ "loss": 1.2026,
+ "step": 1888
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017586867117943392,
+ "loss": 1.1162,
+ "step": 1889
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017584406711387463,
+ "loss": 0.9818,
+ "step": 1890
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001758194522346057,
+ "loss": 0.9802,
+ "step": 1891
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.0001757948265451368,
+ "loss": 0.8963,
+ "step": 1892
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 0.00017577019004897897,
+ "loss": 1.0359,
+ "step": 1893
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017574554274964478,
+ "loss": 1.0788,
+ "step": 1894
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017572088465064848,
+ "loss": 0.9415,
+ "step": 1895
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756962157555057,
+ "loss": 1.0944,
+ "step": 1896
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017567153606773373,
+ "loss": 1.357,
+ "step": 1897
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.00017564684559085136,
+ "loss": 1.0108,
+ "step": 1898
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001756221443283789,
+ "loss": 0.5337,
+ "step": 1899
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755974322838382,
+ "loss": 1.4234,
+ "step": 1900
+ },
+ {
+ "epoch": 2.25,
+ "learning_rate": 0.0001755727094607527,
+ "loss": 0.9083,
+ "step": 1901
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017554797586264727,
+ "loss": 0.9199,
+ "step": 1902
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017552323149304844,
+ "loss": 1.1885,
+ "step": 1903
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754984763554842,
+ "loss": 1.276,
+ "step": 1904
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754737104534841,
+ "loss": 0.8882,
+ "step": 1905
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017544893379057918,
+ "loss": 0.993,
+ "step": 1906
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.0001754241463703021,
+ "loss": 1.261,
+ "step": 1907
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017539934819618696,
+ "loss": 0.9877,
+ "step": 1908
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 0.00017537453927176947,
+ "loss": 0.9991,
+ "step": 1909
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017534971960058685,
+ "loss": 1.2012,
+ "step": 1910
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001753248891861778,
+ "loss": 0.864,
+ "step": 1911
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017530004803208263,
+ "loss": 1.0382,
+ "step": 1912
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017527519614184316,
+ "loss": 1.068,
+ "step": 1913
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.00017525033351900268,
+ "loss": 0.8687,
+ "step": 1914
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752254601671061,
+ "loss": 1.1174,
+ "step": 1915
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001752005760896998,
+ "loss": 1.269,
+ "step": 1916
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751756812903317,
+ "loss": 0.7387,
+ "step": 1917
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 0.0001751507757725513,
+ "loss": 0.8484,
+ "step": 1918
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001751258595399095,
+ "loss": 1.0092,
+ "step": 1919
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017510093259595885,
+ "loss": 1.0145,
+ "step": 1920
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017507599494425344,
+ "loss": 1.2969,
+ "step": 1921
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017505104658834875,
+ "loss": 0.7925,
+ "step": 1922
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017502608753180196,
+ "loss": 0.8974,
+ "step": 1923
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017500111777817164,
+ "loss": 0.764,
+ "step": 1924
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.0001749761373310179,
+ "loss": 1.1057,
+ "step": 1925
+ },
+ {
+ "epoch": 2.28,
+ "learning_rate": 0.00017495114619390246,
+ "loss": 0.8092,
+ "step": 1926
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017492614437038845,
+ "loss": 0.9553,
+ "step": 1927
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017490113186404067,
+ "loss": 1.0278,
+ "step": 1928
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748761086784253,
+ "loss": 1.2152,
+ "step": 1929
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017485107481711012,
+ "loss": 1.5154,
+ "step": 1930
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001748260302836644,
+ "loss": 1.1973,
+ "step": 1931
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.00017480097508165896,
+ "loss": 0.9429,
+ "step": 1932
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747759092146661,
+ "loss": 1.5453,
+ "step": 1933
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 0.0001747508326862597,
+ "loss": 1.1691,
+ "step": 1934
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017472574550001508,
+ "loss": 1.2094,
+ "step": 1935
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017470064765950918,
+ "loss": 1.0777,
+ "step": 1936
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017467553916832035,
+ "loss": 1.0883,
+ "step": 1937
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017465042003002857,
+ "loss": 0.9297,
+ "step": 1938
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017462529024821522,
+ "loss": 0.7814,
+ "step": 1939
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017460014982646334,
+ "loss": 1.3645,
+ "step": 1940
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.0001745749987683573,
+ "loss": 1.0604,
+ "step": 1941
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017454983707748317,
+ "loss": 0.9416,
+ "step": 1942
+ },
+ {
+ "epoch": 2.3,
+ "learning_rate": 0.00017452466475742845,
+ "loss": 1.4187,
+ "step": 1943
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017449948181178215,
+ "loss": 1.1619,
+ "step": 1944
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017447428824413482,
+ "loss": 1.1381,
+ "step": 1945
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017444908405807845,
+ "loss": 1.2304,
+ "step": 1946
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.0001744238692572067,
+ "loss": 1.2149,
+ "step": 1947
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017439864384511463,
+ "loss": 0.8172,
+ "step": 1948
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017437340782539877,
+ "loss": 1.0783,
+ "step": 1949
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017434816120165728,
+ "loss": 1.0661,
+ "step": 1950
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 0.00017432290397748982,
+ "loss": 1.1959,
+ "step": 1951
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001742976361564974,
+ "loss": 1.0581,
+ "step": 1952
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017427235774228274,
+ "loss": 0.8948,
+ "step": 1953
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017424706873845,
+ "loss": 1.2565,
+ "step": 1954
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017422176914860476,
+ "loss": 0.9237,
+ "step": 1955
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017419645897635432,
+ "loss": 1.219,
+ "step": 1956
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017417113822530727,
+ "loss": 1.4606,
+ "step": 1957
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.00017414580689907377,
+ "loss": 0.714,
+ "step": 1958
+ },
+ {
+ "epoch": 2.32,
+ "learning_rate": 0.0001741204650012656,
+ "loss": 1.2223,
+ "step": 1959
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017409511253549593,
+ "loss": 0.9828,
+ "step": 1960
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017406974950537942,
+ "loss": 0.9954,
+ "step": 1961
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017404437591453235,
+ "loss": 1.0307,
+ "step": 1962
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001740189917665724,
+ "loss": 0.9331,
+ "step": 1963
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739935970651188,
+ "loss": 1.3517,
+ "step": 1964
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017396819181379232,
+ "loss": 1.2024,
+ "step": 1965
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001739427760162151,
+ "loss": 0.9696,
+ "step": 1966
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.00017391734967601102,
+ "loss": 1.1559,
+ "step": 1967
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 0.0001738919127968052,
+ "loss": 1.3104,
+ "step": 1968
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017386646538222443,
+ "loss": 0.9073,
+ "step": 1969
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017384100743589697,
+ "loss": 1.0539,
+ "step": 1970
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017381553896145255,
+ "loss": 0.9873,
+ "step": 1971
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737900599625224,
+ "loss": 0.9466,
+ "step": 1972
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737645704427393,
+ "loss": 1.0639,
+ "step": 1973
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.0001737390704057375,
+ "loss": 0.5843,
+ "step": 1974
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017371355985515275,
+ "loss": 1.1318,
+ "step": 1975
+ },
+ {
+ "epoch": 2.34,
+ "learning_rate": 0.00017368803879462227,
+ "loss": 1.0116,
+ "step": 1976
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001736625072277848,
+ "loss": 0.8845,
+ "step": 1977
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017363696515828062,
+ "loss": 0.8081,
+ "step": 1978
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017361141258975148,
+ "loss": 0.8795,
+ "step": 1979
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735858495258406,
+ "loss": 0.9725,
+ "step": 1980
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735602759701927,
+ "loss": 1.0164,
+ "step": 1981
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017353469192645405,
+ "loss": 1.2937,
+ "step": 1982
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.0001735090973982723,
+ "loss": 1.0842,
+ "step": 1983
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 0.00017348349238929678,
+ "loss": 1.0043,
+ "step": 1984
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017345787690317815,
+ "loss": 1.1302,
+ "step": 1985
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017343225094356855,
+ "loss": 1.195,
+ "step": 1986
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017340661451412183,
+ "loss": 1.1449,
+ "step": 1987
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017338096761849309,
+ "loss": 1.2244,
+ "step": 1988
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017335531026033897,
+ "loss": 0.9273,
+ "step": 1989
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017332964244331776,
+ "loss": 1.0448,
+ "step": 1990
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017330396417108908,
+ "loss": 1.0074,
+ "step": 1991
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.00017327827544731412,
+ "loss": 0.9284,
+ "step": 1992
+ },
+ {
+ "epoch": 2.36,
+ "learning_rate": 0.0001732525762756555,
+ "loss": 1.0307,
+ "step": 1993
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017322686665977737,
+ "loss": 1.1526,
+ "step": 1994
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017320114660334535,
+ "loss": 0.819,
+ "step": 1995
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017317541611002656,
+ "loss": 1.1029,
+ "step": 1996
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017314967518348962,
+ "loss": 1.2471,
+ "step": 1997
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017312392382740462,
+ "loss": 1.0156,
+ "step": 1998
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017309816204544317,
+ "loss": 1.1843,
+ "step": 1999
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017307238984127832,
+ "loss": 1.1588,
+ "step": 2000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 0.00017304660721858457,
+ "loss": 1.0157,
+ "step": 2001
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.000173020814181038,
+ "loss": 1.0563,
+ "step": 2002
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017299501073231622,
+ "loss": 1.1883,
+ "step": 2003
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017296919687609808,
+ "loss": 0.9404,
+ "step": 2004
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017294337261606417,
+ "loss": 1.2495,
+ "step": 2005
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017291753795589643,
+ "loss": 1.0074,
+ "step": 2006
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017289169289927837,
+ "loss": 1.1411,
+ "step": 2007
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017286583744989488,
+ "loss": 0.9942,
+ "step": 2008
+ },
+ {
+ "epoch": 2.38,
+ "learning_rate": 0.00017283997161143239,
+ "loss": 0.952,
+ "step": 2009
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017281409538757883,
+ "loss": 1.2966,
+ "step": 2010
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017278820878202357,
+ "loss": 1.0836,
+ "step": 2011
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727623117984575,
+ "loss": 1.0984,
+ "step": 2012
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.0001727364044405729,
+ "loss": 0.8822,
+ "step": 2013
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017271048671206366,
+ "loss": 1.2014,
+ "step": 2014
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017268455861662503,
+ "loss": 1.1779,
+ "step": 2015
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017265862015795384,
+ "loss": 0.9966,
+ "step": 2016
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017263267133974832,
+ "loss": 0.9536,
+ "step": 2017
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 0.00017260671216570822,
+ "loss": 0.811,
+ "step": 2018
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017258074263953472,
+ "loss": 0.8241,
+ "step": 2019
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017255476276493056,
+ "loss": 1.1263,
+ "step": 2020
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017252877254559986,
+ "loss": 0.995,
+ "step": 2021
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001725027719852483,
+ "loss": 1.1481,
+ "step": 2022
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.0001724767610875829,
+ "loss": 1.129,
+ "step": 2023
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017245073985631238,
+ "loss": 0.5928,
+ "step": 2024
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017242470829514672,
+ "loss": 0.8326,
+ "step": 2025
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 0.00017239866640779745,
+ "loss": 1.1092,
+ "step": 2026
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017237261419797756,
+ "loss": 1.5015,
+ "step": 2027
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001723465516694016,
+ "loss": 0.9775,
+ "step": 2028
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017232047882578548,
+ "loss": 0.9348,
+ "step": 2029
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.0001722943956708466,
+ "loss": 0.6199,
+ "step": 2030
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017226830220830384,
+ "loss": 1.1485,
+ "step": 2031
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017224219844187764,
+ "loss": 1.1195,
+ "step": 2032
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017221608437528973,
+ "loss": 1.0528,
+ "step": 2033
+ },
+ {
+ "epoch": 2.41,
+ "learning_rate": 0.00017218996001226345,
+ "loss": 1.1058,
+ "step": 2034
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017216382535652355,
+ "loss": 1.1451,
+ "step": 2035
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001721376804117963,
+ "loss": 1.2251,
+ "step": 2036
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017211152518180936,
+ "loss": 1.0708,
+ "step": 2037
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017208535967029188,
+ "loss": 1.0746,
+ "step": 2038
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017205918388097456,
+ "loss": 1.3262,
+ "step": 2039
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017203299781758943,
+ "loss": 0.7619,
+ "step": 2040
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017200680148387007,
+ "loss": 1.01,
+ "step": 2041
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.0001719805948835515,
+ "loss": 1.1651,
+ "step": 2042
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 0.00017195437802037026,
+ "loss": 1.4671,
+ "step": 2043
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017192815089806424,
+ "loss": 0.9857,
+ "step": 2044
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001719019135203729,
+ "loss": 1.2613,
+ "step": 2045
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017187566589103704,
+ "loss": 1.4386,
+ "step": 2046
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.0001718494080137991,
+ "loss": 1.0965,
+ "step": 2047
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017182313989240285,
+ "loss": 0.752,
+ "step": 2048
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017179686153059352,
+ "loss": 0.9126,
+ "step": 2049
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.00017177057293211784,
+ "loss": 1.5075,
+ "step": 2050
+ },
+ {
+ "epoch": 2.43,
+ "learning_rate": 0.000171744274100724,
+ "loss": 1.0407,
+ "step": 2051
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017171796504016166,
+ "loss": 0.8263,
+ "step": 2052
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.0001716916457541819,
+ "loss": 0.9453,
+ "step": 2053
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017166531624653722,
+ "loss": 0.9777,
+ "step": 2054
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017163897652098172,
+ "loss": 1.2129,
+ "step": 2055
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017161262658127086,
+ "loss": 1.3642,
+ "step": 2056
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017158626643116152,
+ "loss": 0.6798,
+ "step": 2057
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017155989607441213,
+ "loss": 0.874,
+ "step": 2058
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 0.00017153351551478247,
+ "loss": 1.0636,
+ "step": 2059
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001715071247560339,
+ "loss": 1.0563,
+ "step": 2060
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714807238019291,
+ "loss": 1.1984,
+ "step": 2061
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017145431265623234,
+ "loss": 0.9444,
+ "step": 2062
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001714278913227092,
+ "loss": 0.7809,
+ "step": 2063
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017140145980512684,
+ "loss": 1.649,
+ "step": 2064
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713750181072538,
+ "loss": 1.0956,
+ "step": 2065
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.0001713485662328601,
+ "loss": 1.2845,
+ "step": 2066
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017132210418571714,
+ "loss": 1.0484,
+ "step": 2067
+ },
+ {
+ "epoch": 2.45,
+ "learning_rate": 0.00017129563196959793,
+ "loss": 1.0291,
+ "step": 2068
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017126914958827679,
+ "loss": 1.1226,
+ "step": 2069
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.0001712426570455295,
+ "loss": 1.0119,
+ "step": 2070
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017121615434513332,
+ "loss": 1.1663,
+ "step": 2071
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.000171189641490867,
+ "loss": 1.1353,
+ "step": 2072
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017116311848651064,
+ "loss": 1.0761,
+ "step": 2073
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017113658533584594,
+ "loss": 1.1978,
+ "step": 2074
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017111004204265582,
+ "loss": 1.3881,
+ "step": 2075
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 0.00017108348861072484,
+ "loss": 1.3945,
+ "step": 2076
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017105692504383897,
+ "loss": 1.3796,
+ "step": 2077
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017103035134578555,
+ "loss": 1.1721,
+ "step": 2078
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001710037675203534,
+ "loss": 1.0061,
+ "step": 2079
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017097717357133284,
+ "loss": 1.2456,
+ "step": 2080
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017095056950251555,
+ "loss": 0.788,
+ "step": 2081
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001709239553176947,
+ "loss": 1.16,
+ "step": 2082
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.0001708973310206649,
+ "loss": 1.0498,
+ "step": 2083
+ },
+ {
+ "epoch": 2.47,
+ "learning_rate": 0.00017087069661522218,
+ "loss": 0.8993,
+ "step": 2084
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017084405210516406,
+ "loss": 1.2088,
+ "step": 2085
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001708173974942894,
+ "loss": 1.0897,
+ "step": 2086
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017079073278639863,
+ "loss": 1.2718,
+ "step": 2087
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017076405798529355,
+ "loss": 1.2325,
+ "step": 2088
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017073737309477736,
+ "loss": 1.0555,
+ "step": 2089
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.00017071067811865476,
+ "loss": 1.1428,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 2.3191208839416504,
+ "eval_runtime": 284.1375,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 2090
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706839730607319,
+ "loss": 1.0908,
+ "step": 2091
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.0001706572579248163,
+ "loss": 1.2092,
+ "step": 2092
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 0.000170630532714717,
+ "loss": 1.1735,
+ "step": 2093
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001706037974342444,
+ "loss": 1.2716,
+ "step": 2094
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017057705208721035,
+ "loss": 1.0095,
+ "step": 2095
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001705502966774282,
+ "loss": 1.3059,
+ "step": 2096
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017052353120871266,
+ "loss": 0.8269,
+ "step": 2097
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.0001704967556848799,
+ "loss": 1.0615,
+ "step": 2098
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017046997010974755,
+ "loss": 1.2709,
+ "step": 2099
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017044317448713461,
+ "loss": 1.1633,
+ "step": 2100
+ },
+ {
+ "epoch": 2.49,
+ "learning_rate": 0.00017041636882086158,
+ "loss": 0.9273,
+ "step": 2101
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017038955311475038,
+ "loss": 1.3117,
+ "step": 2102
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001703627273726243,
+ "loss": 0.8883,
+ "step": 2103
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017033589159830815,
+ "loss": 1.1371,
+ "step": 2104
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017030904579562806,
+ "loss": 1.5402,
+ "step": 2105
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017028218996841172,
+ "loss": 0.9156,
+ "step": 2106
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017025532412048817,
+ "loss": 1.0962,
+ "step": 2107
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.0001702284482556879,
+ "loss": 0.9402,
+ "step": 2108
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 0.00017020156237784279,
+ "loss": 0.8146,
+ "step": 2109
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001701746664907862,
+ "loss": 1.1718,
+ "step": 2110
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017014776059835288,
+ "loss": 1.0618,
+ "step": 2111
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017012084470437907,
+ "loss": 1.4796,
+ "step": 2112
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017009391881270237,
+ "loss": 0.8402,
+ "step": 2113
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017006698292716178,
+ "loss": 1.1641,
+ "step": 2114
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.0001700400370515978,
+ "loss": 1.241,
+ "step": 2115
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00017001308118985237,
+ "loss": 0.8683,
+ "step": 2116
+ },
+ {
+ "epoch": 2.51,
+ "learning_rate": 0.00016998611534576873,
+ "loss": 1.2697,
+ "step": 2117
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016995913952319168,
+ "loss": 0.9233,
+ "step": 2118
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016993215372596737,
+ "loss": 1.2472,
+ "step": 2119
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016990515795794334,
+ "loss": 1.2541,
+ "step": 2120
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016987815222296865,
+ "loss": 1.0016,
+ "step": 2121
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016985113652489374,
+ "loss": 1.0678,
+ "step": 2122
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016982411086757037,
+ "loss": 1.6066,
+ "step": 2123
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016979707525485192,
+ "loss": 1.229,
+ "step": 2124
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016977002969059302,
+ "loss": 0.752,
+ "step": 2125
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 0.00016974297417864977,
+ "loss": 0.8752,
+ "step": 2126
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001697159087228797,
+ "loss": 0.8896,
+ "step": 2127
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016968883332714186,
+ "loss": 0.9657,
+ "step": 2128
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696617479952964,
+ "loss": 1.3657,
+ "step": 2129
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.0001696346527312053,
+ "loss": 0.9876,
+ "step": 2130
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016960754753873162,
+ "loss": 1.0165,
+ "step": 2131
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016958043242174003,
+ "loss": 1.625,
+ "step": 2132
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016955330738409655,
+ "loss": 1.5502,
+ "step": 2133
+ },
+ {
+ "epoch": 2.53,
+ "learning_rate": 0.00016952617242966864,
+ "loss": 1.0793,
+ "step": 2134
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016949902756232507,
+ "loss": 1.4425,
+ "step": 2135
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016947187278593622,
+ "loss": 1.3124,
+ "step": 2136
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016944470810437365,
+ "loss": 0.927,
+ "step": 2137
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016941753352151055,
+ "loss": 1.1911,
+ "step": 2138
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016939034904122138,
+ "loss": 1.0768,
+ "step": 2139
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016936315466738205,
+ "loss": 1.1277,
+ "step": 2140
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.00016933595040386984,
+ "loss": 0.812,
+ "step": 2141
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 0.0001693087362545636,
+ "loss": 0.8299,
+ "step": 2142
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016928151222334338,
+ "loss": 1.1125,
+ "step": 2143
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016925427831409077,
+ "loss": 1.1835,
+ "step": 2144
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016922703453068873,
+ "loss": 1.2007,
+ "step": 2145
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016919978087702163,
+ "loss": 0.8524,
+ "step": 2146
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016917251735697523,
+ "loss": 0.9497,
+ "step": 2147
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016914524397443673,
+ "loss": 1.1004,
+ "step": 2148
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016911796073329466,
+ "loss": 0.8347,
+ "step": 2149
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016909066763743912,
+ "loss": 0.9492,
+ "step": 2150
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 0.00016906336469076148,
+ "loss": 1.1406,
+ "step": 2151
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016903605189715447,
+ "loss": 1.0137,
+ "step": 2152
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001690087292605124,
+ "loss": 1.0624,
+ "step": 2153
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016898139678473076,
+ "loss": 1.1767,
+ "step": 2154
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.0001689540544737067,
+ "loss": 1.4184,
+ "step": 2155
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016892670233133856,
+ "loss": 0.957,
+ "step": 2156
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016889934036152618,
+ "loss": 1.0399,
+ "step": 2157
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016887196856817073,
+ "loss": 1.2009,
+ "step": 2158
+ },
+ {
+ "epoch": 2.56,
+ "learning_rate": 0.00016884458695517495,
+ "loss": 1.3977,
+ "step": 2159
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016881719552644273,
+ "loss": 1.1328,
+ "step": 2160
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016878979428587955,
+ "loss": 1.5007,
+ "step": 2161
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016876238323739221,
+ "loss": 1.1248,
+ "step": 2162
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016873496238488899,
+ "loss": 1.0358,
+ "step": 2163
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016870753173227945,
+ "loss": 1.2961,
+ "step": 2164
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016868009128347459,
+ "loss": 0.9435,
+ "step": 2165
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016865264104238683,
+ "loss": 0.9642,
+ "step": 2166
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 0.00016862518101293,
+ "loss": 1.0169,
+ "step": 2167
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016859771119901929,
+ "loss": 1.0904,
+ "step": 2168
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.0001685702316045713,
+ "loss": 1.3178,
+ "step": 2169
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016854274223350397,
+ "loss": 1.1395,
+ "step": 2170
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016851524308973678,
+ "loss": 1.1207,
+ "step": 2171
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016848773417719044,
+ "loss": 1.3544,
+ "step": 2172
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016846021549978715,
+ "loss": 1.3503,
+ "step": 2173
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016843268706145042,
+ "loss": 1.4276,
+ "step": 2174
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016840514886610529,
+ "loss": 0.9888,
+ "step": 2175
+ },
+ {
+ "epoch": 2.58,
+ "learning_rate": 0.00016837760091767802,
+ "loss": 1.0913,
+ "step": 2176
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001683500432200964,
+ "loss": 1.4781,
+ "step": 2177
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016832247577728955,
+ "loss": 1.2657,
+ "step": 2178
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.000168294898593188,
+ "loss": 0.9206,
+ "step": 2179
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682673116717236,
+ "loss": 0.9218,
+ "step": 2180
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001682397150168297,
+ "loss": 1.2719,
+ "step": 2181
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016821210863244096,
+ "loss": 0.984,
+ "step": 2182
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.00016818449252249345,
+ "loss": 1.4641,
+ "step": 2183
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 0.0001681568666909246,
+ "loss": 1.2571,
+ "step": 2184
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016812923114167328,
+ "loss": 1.2025,
+ "step": 2185
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016810158587867973,
+ "loss": 0.9621,
+ "step": 2186
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016807393090588553,
+ "loss": 1.0016,
+ "step": 2187
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016804626622723368,
+ "loss": 1.031,
+ "step": 2188
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016801859184666857,
+ "loss": 0.7573,
+ "step": 2189
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016799090776813597,
+ "loss": 1.2694,
+ "step": 2190
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.000167963213995583,
+ "loss": 1.196,
+ "step": 2191
+ },
+ {
+ "epoch": 2.6,
+ "learning_rate": 0.00016793551053295822,
+ "loss": 0.8754,
+ "step": 2192
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016790779738421152,
+ "loss": 1.1743,
+ "step": 2193
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678800745532942,
+ "loss": 1.0921,
+ "step": 2194
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016785234204415888,
+ "loss": 0.8778,
+ "step": 2195
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001678245998607597,
+ "loss": 1.0528,
+ "step": 2196
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016779684800705203,
+ "loss": 1.0255,
+ "step": 2197
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001677690864869927,
+ "loss": 0.6344,
+ "step": 2198
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016774131530453992,
+ "loss": 0.8691,
+ "step": 2199
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.00016771353446365318,
+ "loss": 1.2061,
+ "step": 2200
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 0.0001676857439682935,
+ "loss": 1.1759,
+ "step": 2201
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016765794382242314,
+ "loss": 1.1118,
+ "step": 2202
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016763013403000584,
+ "loss": 1.3005,
+ "step": 2203
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016760231459500666,
+ "loss": 1.0415,
+ "step": 2204
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.000167574485521392,
+ "loss": 0.824,
+ "step": 2205
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016754664681312975,
+ "loss": 0.6682,
+ "step": 2206
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016751879847418905,
+ "loss": 1.9204,
+ "step": 2207
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016749094050854047,
+ "loss": 0.9931,
+ "step": 2208
+ },
+ {
+ "epoch": 2.62,
+ "learning_rate": 0.00016746307292015602,
+ "loss": 0.8898,
+ "step": 2209
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016743519571300888,
+ "loss": 1.3337,
+ "step": 2210
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016740730889107383,
+ "loss": 1.2947,
+ "step": 2211
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673794124583269,
+ "loss": 1.1882,
+ "step": 2212
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001673515064187455,
+ "loss": 1.5408,
+ "step": 2213
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016732359077630847,
+ "loss": 1.1273,
+ "step": 2214
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.0001672956655349959,
+ "loss": 0.8954,
+ "step": 2215
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016726773069878934,
+ "loss": 1.1747,
+ "step": 2216
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 0.00016723978627167173,
+ "loss": 0.807,
+ "step": 2217
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016721183225762727,
+ "loss": 1.2512,
+ "step": 2218
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016718386866064166,
+ "loss": 1.0796,
+ "step": 2219
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016715589548470185,
+ "loss": 1.0905,
+ "step": 2220
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016712791273379622,
+ "loss": 1.3779,
+ "step": 2221
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016709992041191452,
+ "loss": 1.2015,
+ "step": 2222
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016707191852304782,
+ "loss": 0.8612,
+ "step": 2223
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001670439070711886,
+ "loss": 1.1819,
+ "step": 2224
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.00016701588606033064,
+ "loss": 1.2715,
+ "step": 2225
+ },
+ {
+ "epoch": 2.64,
+ "learning_rate": 0.0001669878554944692,
+ "loss": 1.3681,
+ "step": 2226
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016695981537760072,
+ "loss": 1.1254,
+ "step": 2227
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669317657137232,
+ "loss": 0.9476,
+ "step": 2228
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.0001669037065068359,
+ "loss": 1.235,
+ "step": 2229
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016687563776093941,
+ "loss": 0.7356,
+ "step": 2230
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016684755948003573,
+ "loss": 0.7901,
+ "step": 2231
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016681947166812824,
+ "loss": 1.317,
+ "step": 2232
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016679137432922163,
+ "loss": 0.8832,
+ "step": 2233
+ },
+ {
+ "epoch": 2.65,
+ "learning_rate": 0.00016676326746732195,
+ "loss": 1.2776,
+ "step": 2234
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016673515108643665,
+ "loss": 1.0435,
+ "step": 2235
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001667070251905745,
+ "loss": 1.0957,
+ "step": 2236
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016667888978374567,
+ "loss": 1.0862,
+ "step": 2237
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016665074486996165,
+ "loss": 1.1112,
+ "step": 2238
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.0001666225904532352,
+ "loss": 1.3633,
+ "step": 2239
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016659442653758064,
+ "loss": 1.444,
+ "step": 2240
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016656625312701348,
+ "loss": 0.8248,
+ "step": 2241
+ },
+ {
+ "epoch": 2.66,
+ "learning_rate": 0.00016653807022555067,
+ "loss": 1.2522,
+ "step": 2242
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001665098778372104,
+ "loss": 1.2107,
+ "step": 2243
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001664816759660124,
+ "loss": 1.0813,
+ "step": 2244
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016645346461597753,
+ "loss": 1.1136,
+ "step": 2245
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016642524379112817,
+ "loss": 1.1003,
+ "step": 2246
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.000166397013495488,
+ "loss": 1.0635,
+ "step": 2247
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016636877373308204,
+ "loss": 1.0575,
+ "step": 2248
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016634052450793663,
+ "loss": 0.7693,
+ "step": 2249
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.00016631226582407952,
+ "loss": 1.5965,
+ "step": 2250
+ },
+ {
+ "epoch": 2.67,
+ "learning_rate": 0.0001662839976855398,
+ "loss": 1.0989,
+ "step": 2251
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016625572009634787,
+ "loss": 0.9198,
+ "step": 2252
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016622743306053548,
+ "loss": 1.0896,
+ "step": 2253
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016619913658213578,
+ "loss": 1.015,
+ "step": 2254
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.0001661708306651832,
+ "loss": 0.8572,
+ "step": 2255
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016614251531371353,
+ "loss": 1.1508,
+ "step": 2256
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.000166114190531764,
+ "loss": 1.1852,
+ "step": 2257
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016608585632337306,
+ "loss": 0.932,
+ "step": 2258
+ },
+ {
+ "epoch": 2.68,
+ "learning_rate": 0.00016605751269258053,
+ "loss": 1.2542,
+ "step": 2259
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016602915964342757,
+ "loss": 0.943,
+ "step": 2260
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016600079717995678,
+ "loss": 1.2438,
+ "step": 2261
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016597242530621203,
+ "loss": 0.9928,
+ "step": 2262
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016594404402623845,
+ "loss": 0.9516,
+ "step": 2263
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016591565334408265,
+ "loss": 1.1689,
+ "step": 2264
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.0001658872532637925,
+ "loss": 1.3155,
+ "step": 2265
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016585884378941725,
+ "loss": 1.1596,
+ "step": 2266
+ },
+ {
+ "epoch": 2.69,
+ "learning_rate": 0.00016583042492500746,
+ "loss": 0.9956,
+ "step": 2267
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016580199667461508,
+ "loss": 0.9289,
+ "step": 2268
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016577355904229325,
+ "loss": 1.3225,
+ "step": 2269
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016574511203209667,
+ "loss": 1.0384,
+ "step": 2270
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001657166556480812,
+ "loss": 0.697,
+ "step": 2271
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016568818989430416,
+ "loss": 0.7702,
+ "step": 2272
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016565971477482404,
+ "loss": 1.1041,
+ "step": 2273
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.00016563123029370093,
+ "loss": 1.0462,
+ "step": 2274
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001656027364549959,
+ "loss": 1.0797,
+ "step": 2275
+ },
+ {
+ "epoch": 2.7,
+ "learning_rate": 0.0001655742332627717,
+ "loss": 1.3301,
+ "step": 2276
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.0001655457207210922,
+ "loss": 1.0467,
+ "step": 2277
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016551719883402271,
+ "loss": 0.9432,
+ "step": 2278
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016548866760562978,
+ "loss": 1.1808,
+ "step": 2279
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016546012703998138,
+ "loss": 1.1094,
+ "step": 2280
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016543157714114673,
+ "loss": 1.3914,
+ "step": 2281
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016540301791319645,
+ "loss": 1.0402,
+ "step": 2282
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.00016537444936020246,
+ "loss": 0.9815,
+ "step": 2283
+ },
+ {
+ "epoch": 2.71,
+ "learning_rate": 0.000165345871486238,
+ "loss": 0.9722,
+ "step": 2284
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016531728429537766,
+ "loss": 0.919,
+ "step": 2285
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016528868779169738,
+ "loss": 1.1242,
+ "step": 2286
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016526008197927436,
+ "loss": 1.1794,
+ "step": 2287
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016523146686218718,
+ "loss": 1.434,
+ "step": 2288
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016520284244451574,
+ "loss": 0.8463,
+ "step": 2289
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016517420873034123,
+ "loss": 1.1736,
+ "step": 2290
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.0001651455657237462,
+ "loss": 1.0431,
+ "step": 2291
+ },
+ {
+ "epoch": 2.72,
+ "learning_rate": 0.00016511691342881453,
+ "loss": 1.2796,
+ "step": 2292
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650882518496314,
+ "loss": 1.0578,
+ "step": 2293
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016505958099028334,
+ "loss": 1.3914,
+ "step": 2294
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650309008548582,
+ "loss": 1.0046,
+ "step": 2295
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001650022114474451,
+ "loss": 1.0246,
+ "step": 2296
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016497351277213458,
+ "loss": 1.2789,
+ "step": 2297
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016494480483301836,
+ "loss": 1.0036,
+ "step": 2298
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.00016491608763418968,
+ "loss": 0.886,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "eval_loss": 2.3017475605010986,
+ "eval_runtime": 283.8846,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2299
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648873611797429,
+ "loss": 1.3953,
+ "step": 2300
+ },
+ {
+ "epoch": 2.73,
+ "learning_rate": 0.0001648586254737738,
+ "loss": 0.6972,
+ "step": 2301
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016482988052037947,
+ "loss": 1.2311,
+ "step": 2302
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016480112632365833,
+ "loss": 1.327,
+ "step": 2303
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647723628877101,
+ "loss": 0.9534,
+ "step": 2304
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647435902166358,
+ "loss": 0.9164,
+ "step": 2305
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.0001647148083145378,
+ "loss": 1.1038,
+ "step": 2306
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016468601718551976,
+ "loss": 1.0444,
+ "step": 2307
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016465721683368666,
+ "loss": 1.2635,
+ "step": 2308
+ },
+ {
+ "epoch": 2.74,
+ "learning_rate": 0.00016462840726314486,
+ "loss": 1.1647,
+ "step": 2309
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016459958847800187,
+ "loss": 1.3617,
+ "step": 2310
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016457076048236675,
+ "loss": 1.2355,
+ "step": 2311
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016454192328034962,
+ "loss": 0.9989,
+ "step": 2312
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016451307687606213,
+ "loss": 1.1218,
+ "step": 2313
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.00016448422127361706,
+ "loss": 0.8967,
+ "step": 2314
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644553564771287,
+ "loss": 1.159,
+ "step": 2315
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001644264824907124,
+ "loss": 1.5901,
+ "step": 2316
+ },
+ {
+ "epoch": 2.75,
+ "learning_rate": 0.0001643975993184851,
+ "loss": 0.979,
+ "step": 2317
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016436870696456482,
+ "loss": 0.8561,
+ "step": 2318
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016433980543307107,
+ "loss": 0.9485,
+ "step": 2319
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016431089472812444,
+ "loss": 0.7736,
+ "step": 2320
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016428197485384707,
+ "loss": 1.2546,
+ "step": 2321
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016425304581436226,
+ "loss": 0.9534,
+ "step": 2322
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001642241076137947,
+ "loss": 0.8182,
+ "step": 2323
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641951602562703,
+ "loss": 1.1107,
+ "step": 2324
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.0001641662037459164,
+ "loss": 1.0628,
+ "step": 2325
+ },
+ {
+ "epoch": 2.76,
+ "learning_rate": 0.00016413723808686147,
+ "loss": 1.6261,
+ "step": 2326
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001641082632832354,
+ "loss": 1.0286,
+ "step": 2327
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001640792793391694,
+ "loss": 0.5732,
+ "step": 2328
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016405028625879594,
+ "loss": 1.0932,
+ "step": 2329
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016402128404624882,
+ "loss": 1.2585,
+ "step": 2330
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016399227270566308,
+ "loss": 0.8788,
+ "step": 2331
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639632522411751,
+ "loss": 1.1397,
+ "step": 2332
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.00016393422265692262,
+ "loss": 1.3517,
+ "step": 2333
+ },
+ {
+ "epoch": 2.77,
+ "learning_rate": 0.0001639051839570446,
+ "loss": 1.1346,
+ "step": 2334
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016387613614568126,
+ "loss": 0.9594,
+ "step": 2335
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001638470792269743,
+ "loss": 1.0674,
+ "step": 2336
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016381801320506653,
+ "loss": 0.9123,
+ "step": 2337
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016378893808410215,
+ "loss": 1.1909,
+ "step": 2338
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016375985386822664,
+ "loss": 1.0474,
+ "step": 2339
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016373076056158675,
+ "loss": 0.8844,
+ "step": 2340
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.0001637016581683306,
+ "loss": 1.1606,
+ "step": 2341
+ },
+ {
+ "epoch": 2.78,
+ "learning_rate": 0.00016367254669260749,
+ "loss": 0.6206,
+ "step": 2342
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016364342613856816,
+ "loss": 0.7225,
+ "step": 2343
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016361429651036446,
+ "loss": 1.1782,
+ "step": 2344
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016358515781214977,
+ "loss": 1.0911,
+ "step": 2345
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016355601004807856,
+ "loss": 1.2727,
+ "step": 2346
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016352685322230663,
+ "loss": 0.8294,
+ "step": 2347
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016349768733899117,
+ "loss": 1.1661,
+ "step": 2348
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016346851240229057,
+ "loss": 0.8267,
+ "step": 2349
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016343932841636456,
+ "loss": 1.2873,
+ "step": 2350
+ },
+ {
+ "epoch": 2.79,
+ "learning_rate": 0.00016341013538537412,
+ "loss": 1.2459,
+ "step": 2351
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016338093331348156,
+ "loss": 0.8939,
+ "step": 2352
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016335172220485042,
+ "loss": 1.024,
+ "step": 2353
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.0001633225020636456,
+ "loss": 0.9981,
+ "step": 2354
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016329327289403325,
+ "loss": 1.331,
+ "step": 2355
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016326403470018084,
+ "loss": 0.7446,
+ "step": 2356
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016323478748625703,
+ "loss": 1.1931,
+ "step": 2357
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016320553125643187,
+ "loss": 1.1287,
+ "step": 2358
+ },
+ {
+ "epoch": 2.8,
+ "learning_rate": 0.00016317626601487667,
+ "loss": 1.109,
+ "step": 2359
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016314699176576402,
+ "loss": 0.9946,
+ "step": 2360
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016311770851326778,
+ "loss": 0.8347,
+ "step": 2361
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016308841626156307,
+ "loss": 0.9214,
+ "step": 2362
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.0001630591150148264,
+ "loss": 0.5907,
+ "step": 2363
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016302980477723539,
+ "loss": 1.2412,
+ "step": 2364
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016300048555296915,
+ "loss": 1.2908,
+ "step": 2365
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016297115734620788,
+ "loss": 1.2345,
+ "step": 2366
+ },
+ {
+ "epoch": 2.81,
+ "learning_rate": 0.00016294182016113315,
+ "loss": 1.0418,
+ "step": 2367
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016291247400192785,
+ "loss": 1.1457,
+ "step": 2368
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016288311887277608,
+ "loss": 1.2529,
+ "step": 2369
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016285375477786322,
+ "loss": 1.0013,
+ "step": 2370
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016282438172137597,
+ "loss": 0.943,
+ "step": 2371
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016279499970750226,
+ "loss": 0.7009,
+ "step": 2372
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016276560874043137,
+ "loss": 0.9408,
+ "step": 2373
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627362088243538,
+ "loss": 1.1788,
+ "step": 2374
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.0001627067999634613,
+ "loss": 0.8106,
+ "step": 2375
+ },
+ {
+ "epoch": 2.82,
+ "learning_rate": 0.00016267738216194696,
+ "loss": 1.1695,
+ "step": 2376
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001626479554240051,
+ "loss": 0.9209,
+ "step": 2377
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016261851975383137,
+ "loss": 0.9911,
+ "step": 2378
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016258907515562262,
+ "loss": 1.3819,
+ "step": 2379
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162559621633577,
+ "loss": 0.8926,
+ "step": 2380
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.000162530159191894,
+ "loss": 1.0896,
+ "step": 2381
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016250068783477424,
+ "loss": 0.8403,
+ "step": 2382
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.00016247120756641972,
+ "loss": 0.7976,
+ "step": 2383
+ },
+ {
+ "epoch": 2.83,
+ "learning_rate": 0.0001624417183910337,
+ "loss": 0.8881,
+ "step": 2384
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001624122203128207,
+ "loss": 0.8302,
+ "step": 2385
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623827133359865,
+ "loss": 1.3312,
+ "step": 2386
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623531974647381,
+ "loss": 1.003,
+ "step": 2387
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.0001623236727032839,
+ "loss": 0.9487,
+ "step": 2388
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016229413905583342,
+ "loss": 1.2259,
+ "step": 2389
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016226459652659753,
+ "loss": 0.9327,
+ "step": 2390
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016223504511978838,
+ "loss": 0.7336,
+ "step": 2391
+ },
+ {
+ "epoch": 2.84,
+ "learning_rate": 0.00016220548483961934,
+ "loss": 1.0454,
+ "step": 2392
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016217591569030505,
+ "loss": 1.3371,
+ "step": 2393
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016214633767606143,
+ "loss": 1.0814,
+ "step": 2394
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016211675080110566,
+ "loss": 1.2274,
+ "step": 2395
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001620871550696562,
+ "loss": 0.9775,
+ "step": 2396
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016205755048593273,
+ "loss": 1.0323,
+ "step": 2397
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016202793705415622,
+ "loss": 1.5101,
+ "step": 2398
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016199831477854893,
+ "loss": 0.8118,
+ "step": 2399
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.0001619686836633343,
+ "loss": 1.0233,
+ "step": 2400
+ },
+ {
+ "epoch": 2.85,
+ "learning_rate": 0.00016193904371273715,
+ "loss": 0.9038,
+ "step": 2401
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016190939493098344,
+ "loss": 0.875,
+ "step": 2402
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016187973732230038,
+ "loss": 1.3274,
+ "step": 2403
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016185007089091665,
+ "loss": 1.081,
+ "step": 2404
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016182039564106192,
+ "loss": 1.0841,
+ "step": 2405
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.00016179071157696728,
+ "loss": 1.3208,
+ "step": 2406
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.000161761018702865,
+ "loss": 1.1854,
+ "step": 2407
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617313170229887,
+ "loss": 1.0651,
+ "step": 2408
+ },
+ {
+ "epoch": 2.86,
+ "learning_rate": 0.0001617016065415731,
+ "loss": 1.1398,
+ "step": 2409
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016167188726285434,
+ "loss": 1.2778,
+ "step": 2410
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016164215919106968,
+ "loss": 1.6758,
+ "step": 2411
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.0001616124223304577,
+ "loss": 0.8341,
+ "step": 2412
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016158267668525832,
+ "loss": 0.9513,
+ "step": 2413
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016155292225971253,
+ "loss": 0.9617,
+ "step": 2414
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016152315905806268,
+ "loss": 0.8664,
+ "step": 2415
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016149338708455237,
+ "loss": 1.331,
+ "step": 2416
+ },
+ {
+ "epoch": 2.87,
+ "learning_rate": 0.00016146360634342643,
+ "loss": 1.4212,
+ "step": 2417
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016143381683893094,
+ "loss": 1.2126,
+ "step": 2418
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016140401857531322,
+ "loss": 0.934,
+ "step": 2419
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016137421155682183,
+ "loss": 1.2417,
+ "step": 2420
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001613443957877067,
+ "loss": 1.637,
+ "step": 2421
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016131457127221881,
+ "loss": 1.1456,
+ "step": 2422
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.00016128473801461053,
+ "loss": 0.9402,
+ "step": 2423
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612548960191354,
+ "loss": 1.3797,
+ "step": 2424
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001612250452900483,
+ "loss": 0.8191,
+ "step": 2425
+ },
+ {
+ "epoch": 2.88,
+ "learning_rate": 0.0001611951858316052,
+ "loss": 1.1725,
+ "step": 2426
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016116531764806346,
+ "loss": 1.5701,
+ "step": 2427
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016113544074368164,
+ "loss": 1.0591,
+ "step": 2428
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016110555512271953,
+ "loss": 1.03,
+ "step": 2429
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.0001610756607894382,
+ "loss": 1.1829,
+ "step": 2430
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016104575774809985,
+ "loss": 1.2222,
+ "step": 2431
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016101584600296804,
+ "loss": 1.1537,
+ "step": 2432
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016098592555830753,
+ "loss": 1.0973,
+ "step": 2433
+ },
+ {
+ "epoch": 2.89,
+ "learning_rate": 0.00016095599641838436,
+ "loss": 1.0793,
+ "step": 2434
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016092605858746573,
+ "loss": 1.3484,
+ "step": 2435
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608961120698201,
+ "loss": 1.1689,
+ "step": 2436
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016086615686971726,
+ "loss": 1.0864,
+ "step": 2437
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016083619299142813,
+ "loss": 1.2451,
+ "step": 2438
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.0001608062204392249,
+ "loss": 0.9593,
+ "step": 2439
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016077623921738102,
+ "loss": 0.9816,
+ "step": 2440
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016074624933017112,
+ "loss": 1.0845,
+ "step": 2441
+ },
+ {
+ "epoch": 2.9,
+ "learning_rate": 0.00016071625078187114,
+ "loss": 0.9875,
+ "step": 2442
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001606862435767582,
+ "loss": 0.8758,
+ "step": 2443
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016065622771911067,
+ "loss": 0.9499,
+ "step": 2444
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016062620321320823,
+ "loss": 1.1133,
+ "step": 2445
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605961700633316,
+ "loss": 0.7228,
+ "step": 2446
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016056612827376293,
+ "loss": 1.2297,
+ "step": 2447
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001605360778487855,
+ "loss": 1.0251,
+ "step": 2448
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016050601879268386,
+ "loss": 0.8097,
+ "step": 2449
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.00016047595110974376,
+ "loss": 0.9872,
+ "step": 2450
+ },
+ {
+ "epoch": 2.91,
+ "learning_rate": 0.0001604458748042522,
+ "loss": 1.1119,
+ "step": 2451
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.0001604157898804974,
+ "loss": 0.8256,
+ "step": 2452
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016038569634276882,
+ "loss": 0.9036,
+ "step": 2453
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016035559419535716,
+ "loss": 1.1173,
+ "step": 2454
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016032548344255428,
+ "loss": 1.3173,
+ "step": 2455
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016029536408865337,
+ "loss": 0.717,
+ "step": 2456
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016026523613794878,
+ "loss": 0.9806,
+ "step": 2457
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016023509959473605,
+ "loss": 1.1509,
+ "step": 2458
+ },
+ {
+ "epoch": 2.92,
+ "learning_rate": 0.00016020495446331207,
+ "loss": 1.0454,
+ "step": 2459
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601748007479748,
+ "loss": 1.183,
+ "step": 2460
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.0001601446384530236,
+ "loss": 1.2611,
+ "step": 2461
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016011446758275888,
+ "loss": 1.0377,
+ "step": 2462
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016008428814148236,
+ "loss": 1.2111,
+ "step": 2463
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016005410013349698,
+ "loss": 1.0952,
+ "step": 2464
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00016002390356310685,
+ "loss": 0.7589,
+ "step": 2465
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015999369843461742,
+ "loss": 0.8543,
+ "step": 2466
+ },
+ {
+ "epoch": 2.93,
+ "learning_rate": 0.00015996348475233525,
+ "loss": 1.1509,
+ "step": 2467
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001599332625205681,
+ "loss": 1.287,
+ "step": 2468
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015990303174362512,
+ "loss": 1.0401,
+ "step": 2469
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001598727924258164,
+ "loss": 1.0247,
+ "step": 2470
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015984254457145354,
+ "loss": 1.1537,
+ "step": 2471
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015981228818484917,
+ "loss": 0.9606,
+ "step": 2472
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001597820232703172,
+ "loss": 0.8709,
+ "step": 2473
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015975174983217275,
+ "loss": 1.2827,
+ "step": 2474
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.00015972146787473213,
+ "loss": 0.8057,
+ "step": 2475
+ },
+ {
+ "epoch": 2.94,
+ "learning_rate": 0.0001596911774023129,
+ "loss": 1.0857,
+ "step": 2476
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015966087841923386,
+ "loss": 1.1731,
+ "step": 2477
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001596305709298149,
+ "loss": 0.8871,
+ "step": 2478
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015960025493837727,
+ "loss": 1.0671,
+ "step": 2479
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015956993044924334,
+ "loss": 1.3735,
+ "step": 2480
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015953959746673675,
+ "loss": 1.4655,
+ "step": 2481
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015950925599518228,
+ "loss": 1.3975,
+ "step": 2482
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.00015947890603890602,
+ "loss": 0.9468,
+ "step": 2483
+ },
+ {
+ "epoch": 2.95,
+ "learning_rate": 0.0001594485476022352,
+ "loss": 0.9976,
+ "step": 2484
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015941818068949818,
+ "loss": 0.6732,
+ "step": 2485
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015938780530502474,
+ "loss": 0.9848,
+ "step": 2486
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015935742145314568,
+ "loss": 1.2441,
+ "step": 2487
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001593270291381931,
+ "loss": 0.9631,
+ "step": 2488
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015929662836450029,
+ "loss": 0.8868,
+ "step": 2489
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592662191364017,
+ "loss": 0.9063,
+ "step": 2490
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.00015923580145823303,
+ "loss": 0.6886,
+ "step": 2491
+ },
+ {
+ "epoch": 2.96,
+ "learning_rate": 0.0001592053753343312,
+ "loss": 1.0702,
+ "step": 2492
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591749407690343,
+ "loss": 1.3879,
+ "step": 2493
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015914449776668167,
+ "loss": 1.1048,
+ "step": 2494
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001591140463316137,
+ "loss": 0.9921,
+ "step": 2495
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015908358646817225,
+ "loss": 1.3042,
+ "step": 2496
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015905311818070015,
+ "loss": 0.8413,
+ "step": 2497
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.00015902264147354153,
+ "loss": 1.5201,
+ "step": 2498
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589921563510417,
+ "loss": 1.0727,
+ "step": 2499
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589616628175472,
+ "loss": 1.0439,
+ "step": 2500
+ },
+ {
+ "epoch": 2.97,
+ "learning_rate": 0.0001589311608774057,
+ "loss": 1.2308,
+ "step": 2501
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015890065053496613,
+ "loss": 1.1155,
+ "step": 2502
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015887013179457862,
+ "loss": 1.3345,
+ "step": 2503
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015883960466059444,
+ "loss": 0.9551,
+ "step": 2504
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001588090691373661,
+ "loss": 1.0713,
+ "step": 2505
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.00015877852522924732,
+ "loss": 1.299,
+ "step": 2506
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.000158747972940593,
+ "loss": 0.8535,
+ "step": 2507
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001587174122757592,
+ "loss": 0.9924,
+ "step": 2508
+ },
+ {
+ "epoch": 2.98,
+ "eval_loss": 2.328662395477295,
+ "eval_runtime": 283.7765,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2508
+ },
+ {
+ "epoch": 2.98,
+ "learning_rate": 0.0001586868432391032,
+ "loss": 1.0512,
+ "step": 2509
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015865626583498355,
+ "loss": 1.2775,
+ "step": 2510
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015862568006775983,
+ "loss": 0.7054,
+ "step": 2511
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015859508594179294,
+ "loss": 0.8524,
+ "step": 2512
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015856448346144496,
+ "loss": 0.9871,
+ "step": 2513
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015853387263107909,
+ "loss": 0.8642,
+ "step": 2514
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015850325345505975,
+ "loss": 1.1789,
+ "step": 2515
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015847262593775266,
+ "loss": 1.2765,
+ "step": 2516
+ },
+ {
+ "epoch": 2.99,
+ "learning_rate": 0.00015844199008352458,
+ "loss": 0.6272,
+ "step": 2517
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015841134589674352,
+ "loss": 1.3037,
+ "step": 2518
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015838069338177863,
+ "loss": 1.054,
+ "step": 2519
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015835003254300039,
+ "loss": 1.1942,
+ "step": 2520
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015831936338478025,
+ "loss": 0.8866,
+ "step": 2521
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015828868591149104,
+ "loss": 1.1444,
+ "step": 2522
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015825800012750666,
+ "loss": 0.8597,
+ "step": 2523
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001582273060372023,
+ "loss": 0.7731,
+ "step": 2524
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015819660364495416,
+ "loss": 1.1953,
+ "step": 2525
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001581658929551398,
+ "loss": 1.3946,
+ "step": 2526
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015813517397213791,
+ "loss": 1.0173,
+ "step": 2527
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015810444670032831,
+ "loss": 1.1762,
+ "step": 2528
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015807371114409202,
+ "loss": 0.7283,
+ "step": 2529
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015804296730781135,
+ "loss": 1.1515,
+ "step": 2530
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015801221519586958,
+ "loss": 0.9389,
+ "step": 2531
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001579814548126514,
+ "loss": 1.1869,
+ "step": 2532
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015795068616254247,
+ "loss": 1.2957,
+ "step": 2533
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015791990924992981,
+ "loss": 1.0514,
+ "step": 2534
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015788912407920148,
+ "loss": 0.6762,
+ "step": 2535
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015785833065474683,
+ "loss": 0.4121,
+ "step": 2536
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.00015782752898095627,
+ "loss": 0.4532,
+ "step": 2537
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001577967190622215,
+ "loss": 0.4847,
+ "step": 2538
+ },
+ {
+ "epoch": 3.0,
+ "learning_rate": 0.0001577659009029353,
+ "loss": 0.8313,
+ "step": 2539
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015773507450749172,
+ "loss": 0.5304,
+ "step": 2540
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015770423988028588,
+ "loss": 0.6003,
+ "step": 2541
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015767339702571414,
+ "loss": 0.3988,
+ "step": 2542
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015764254594817398,
+ "loss": 0.6133,
+ "step": 2543
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001576116866520642,
+ "loss": 0.4858,
+ "step": 2544
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.00015758081914178456,
+ "loss": 0.3691,
+ "step": 2545
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001575499434217361,
+ "loss": 0.5441,
+ "step": 2546
+ },
+ {
+ "epoch": 3.01,
+ "learning_rate": 0.0001575190594963211,
+ "loss": 0.4605,
+ "step": 2547
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015748816736994284,
+ "loss": 0.3681,
+ "step": 2548
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015745726704700593,
+ "loss": 0.4113,
+ "step": 2549
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015742635853191608,
+ "loss": 0.5233,
+ "step": 2550
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015739544182908014,
+ "loss": 0.356,
+ "step": 2551
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015736451694290616,
+ "loss": 0.4105,
+ "step": 2552
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015733358387780337,
+ "loss": 0.4451,
+ "step": 2553
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.00015730264263818212,
+ "loss": 0.5023,
+ "step": 2554
+ },
+ {
+ "epoch": 3.02,
+ "learning_rate": 0.000157271693228454,
+ "loss": 0.3671,
+ "step": 2555
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001572407356530317,
+ "loss": 0.7077,
+ "step": 2556
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015720976991632913,
+ "loss": 0.4439,
+ "step": 2557
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015717879602276122,
+ "loss": 0.5961,
+ "step": 2558
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001571478139767443,
+ "loss": 0.4269,
+ "step": 2559
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015711682378269565,
+ "loss": 0.3427,
+ "step": 2560
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015708582544503386,
+ "loss": 0.5736,
+ "step": 2561
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.00015705481896817854,
+ "loss": 0.3707,
+ "step": 2562
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.0001570238043565506,
+ "loss": 0.4076,
+ "step": 2563
+ },
+ {
+ "epoch": 3.03,
+ "learning_rate": 0.000156992781614572,
+ "loss": 0.6514,
+ "step": 2564
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015696175074666598,
+ "loss": 0.4012,
+ "step": 2565
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.0001569307117572568,
+ "loss": 0.3492,
+ "step": 2566
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015689966465076992,
+ "loss": 0.4121,
+ "step": 2567
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015686860943163206,
+ "loss": 0.5769,
+ "step": 2568
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015683754610427094,
+ "loss": 0.4872,
+ "step": 2569
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015680647467311557,
+ "loss": 0.5518,
+ "step": 2570
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015677539514259608,
+ "loss": 0.411,
+ "step": 2571
+ },
+ {
+ "epoch": 3.04,
+ "learning_rate": 0.00015674430751714361,
+ "loss": 0.3443,
+ "step": 2572
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015671321180119074,
+ "loss": 0.3706,
+ "step": 2573
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.0001566821079991709,
+ "loss": 0.6168,
+ "step": 2574
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.0001566509961155189,
+ "loss": 0.3726,
+ "step": 2575
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015661987615467058,
+ "loss": 0.3976,
+ "step": 2576
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015658874812106297,
+ "loss": 0.3697,
+ "step": 2577
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015655761201913425,
+ "loss": 0.2759,
+ "step": 2578
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.00015652646785332378,
+ "loss": 0.3572,
+ "step": 2579
+ },
+ {
+ "epoch": 3.05,
+ "learning_rate": 0.000156495315628072,
+ "loss": 0.5333,
+ "step": 2580
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015646415534782056,
+ "loss": 0.4004,
+ "step": 2581
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.0001564329870170122,
+ "loss": 0.4736,
+ "step": 2582
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015640181064009088,
+ "loss": 0.4814,
+ "step": 2583
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015637062622150168,
+ "loss": 0.3351,
+ "step": 2584
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015633943376569081,
+ "loss": 0.4497,
+ "step": 2585
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015630823327710558,
+ "loss": 0.4202,
+ "step": 2586
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015627702476019457,
+ "loss": 0.5934,
+ "step": 2587
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.0001562458082194074,
+ "loss": 0.4664,
+ "step": 2588
+ },
+ {
+ "epoch": 3.06,
+ "learning_rate": 0.00015621458365919487,
+ "loss": 0.4077,
+ "step": 2589
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015618335108400893,
+ "loss": 0.5244,
+ "step": 2590
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015615211049830268,
+ "loss": 0.5042,
+ "step": 2591
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015612086190653027,
+ "loss": 0.3442,
+ "step": 2592
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015608960531314717,
+ "loss": 0.6337,
+ "step": 2593
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015605834072260984,
+ "loss": 0.3542,
+ "step": 2594
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.0001560270681393759,
+ "loss": 0.5113,
+ "step": 2595
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.0001559957875679042,
+ "loss": 0.4346,
+ "step": 2596
+ },
+ {
+ "epoch": 3.07,
+ "learning_rate": 0.00015596449901265463,
+ "loss": 0.5231,
+ "step": 2597
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015593320247808822,
+ "loss": 0.5193,
+ "step": 2598
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001559018979686673,
+ "loss": 0.3575,
+ "step": 2599
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015587058548885505,
+ "loss": 0.6356,
+ "step": 2600
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015583926504311605,
+ "loss": 0.3313,
+ "step": 2601
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015580793663591585,
+ "loss": 0.356,
+ "step": 2602
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.00015577660027172127,
+ "loss": 0.5498,
+ "step": 2603
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001557452559550001,
+ "loss": 0.3973,
+ "step": 2604
+ },
+ {
+ "epoch": 3.08,
+ "learning_rate": 0.0001557139036902215,
+ "loss": 0.4751,
+ "step": 2605
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015568254348185544,
+ "loss": 0.4297,
+ "step": 2606
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015565117533437335,
+ "loss": 0.4299,
+ "step": 2607
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015561979925224754,
+ "loss": 0.4651,
+ "step": 2608
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015558841523995162,
+ "loss": 0.474,
+ "step": 2609
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015555702330196023,
+ "loss": 0.4143,
+ "step": 2610
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.0001555256234427492,
+ "loss": 0.393,
+ "step": 2611
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015549421566679546,
+ "loss": 0.3738,
+ "step": 2612
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.00015546279997857704,
+ "loss": 0.4394,
+ "step": 2613
+ },
+ {
+ "epoch": 3.09,
+ "learning_rate": 0.0001554313763825732,
+ "loss": 0.3702,
+ "step": 2614
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015539994488326418,
+ "loss": 0.4594,
+ "step": 2615
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015536850548513147,
+ "loss": 0.3249,
+ "step": 2616
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015533705819265764,
+ "loss": 0.3857,
+ "step": 2617
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.0001553056030103264,
+ "loss": 0.3272,
+ "step": 2618
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015527413994262257,
+ "loss": 0.5204,
+ "step": 2619
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.00015524266899403206,
+ "loss": 0.3653,
+ "step": 2620
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.000155211190169042,
+ "loss": 0.4698,
+ "step": 2621
+ },
+ {
+ "epoch": 3.1,
+ "learning_rate": 0.0001551797034721405,
+ "loss": 0.5949,
+ "step": 2622
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015514820890781693,
+ "loss": 0.4074,
+ "step": 2623
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015511670648056178,
+ "loss": 0.3586,
+ "step": 2624
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.0001550851961948665,
+ "loss": 0.6494,
+ "step": 2625
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015505367805522383,
+ "loss": 0.4914,
+ "step": 2626
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.0001550221520661276,
+ "loss": 0.4594,
+ "step": 2627
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015499061823207266,
+ "loss": 0.4102,
+ "step": 2628
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.00015495907655755506,
+ "loss": 0.4229,
+ "step": 2629
+ },
+ {
+ "epoch": 3.11,
+ "learning_rate": 0.000154927527047072,
+ "loss": 0.7218,
+ "step": 2630
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001548959697051217,
+ "loss": 0.6929,
+ "step": 2631
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015486440453620358,
+ "loss": 0.3628,
+ "step": 2632
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015483283154481815,
+ "loss": 0.4433,
+ "step": 2633
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015480125073546704,
+ "loss": 0.3912,
+ "step": 2634
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001547696621126529,
+ "loss": 0.3682,
+ "step": 2635
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015473806568087968,
+ "loss": 0.354,
+ "step": 2636
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001547064614446523,
+ "loss": 0.4789,
+ "step": 2637
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.0001546748494084768,
+ "loss": 0.382,
+ "step": 2638
+ },
+ {
+ "epoch": 3.12,
+ "learning_rate": 0.00015464322957686041,
+ "loss": 0.4954,
+ "step": 2639
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015461160195431148,
+ "loss": 0.3273,
+ "step": 2640
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001545799665453393,
+ "loss": 0.3414,
+ "step": 2641
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015454832335445447,
+ "loss": 0.5479,
+ "step": 2642
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001545166723861686,
+ "loss": 0.4963,
+ "step": 2643
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015448501364499445,
+ "loss": 0.5547,
+ "step": 2644
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.0001544533471354458,
+ "loss": 0.4637,
+ "step": 2645
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015442167286203767,
+ "loss": 0.4248,
+ "step": 2646
+ },
+ {
+ "epoch": 3.13,
+ "learning_rate": 0.00015438999082928608,
+ "loss": 0.4213,
+ "step": 2647
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015435830104170822,
+ "loss": 0.3734,
+ "step": 2648
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015432660350382234,
+ "loss": 0.4627,
+ "step": 2649
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.0001542948982201479,
+ "loss": 0.3422,
+ "step": 2650
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015426318519520525,
+ "loss": 0.4409,
+ "step": 2651
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015423146443351607,
+ "loss": 0.3717,
+ "step": 2652
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015419973593960298,
+ "loss": 0.4349,
+ "step": 2653
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.00015416799971798985,
+ "loss": 0.5349,
+ "step": 2654
+ },
+ {
+ "epoch": 3.14,
+ "learning_rate": 0.0001541362557732015,
+ "loss": 0.4511,
+ "step": 2655
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.000154104504109764,
+ "loss": 0.5997,
+ "step": 2656
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015407274473220434,
+ "loss": 0.661,
+ "step": 2657
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015404097764505083,
+ "loss": 0.3456,
+ "step": 2658
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015400920285283268,
+ "loss": 0.3416,
+ "step": 2659
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015397742036008034,
+ "loss": 0.4707,
+ "step": 2660
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015394563017132526,
+ "loss": 0.3221,
+ "step": 2661
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015391383229110007,
+ "loss": 0.6108,
+ "step": 2662
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015388202672393834,
+ "loss": 0.5504,
+ "step": 2663
+ },
+ {
+ "epoch": 3.15,
+ "learning_rate": 0.00015385021347437498,
+ "loss": 0.3973,
+ "step": 2664
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015381839254694583,
+ "loss": 0.5149,
+ "step": 2665
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015378656394618787,
+ "loss": 0.5853,
+ "step": 2666
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.0001537547276766391,
+ "loss": 0.517,
+ "step": 2667
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015372288374283875,
+ "loss": 0.5485,
+ "step": 2668
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015369103214932703,
+ "loss": 0.4907,
+ "step": 2669
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.0001536591729006453,
+ "loss": 0.3169,
+ "step": 2670
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015362730600133596,
+ "loss": 0.5431,
+ "step": 2671
+ },
+ {
+ "epoch": 3.16,
+ "learning_rate": 0.00015359543145594258,
+ "loss": 0.2586,
+ "step": 2672
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015356354926900979,
+ "loss": 0.5251,
+ "step": 2673
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015353165944508325,
+ "loss": 0.4104,
+ "step": 2674
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015349976198870973,
+ "loss": 0.4825,
+ "step": 2675
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015346785690443718,
+ "loss": 0.5274,
+ "step": 2676
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.0001534359441968145,
+ "loss": 0.3878,
+ "step": 2677
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.0001534040238703918,
+ "loss": 0.5132,
+ "step": 2678
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015337209592972023,
+ "loss": 0.5145,
+ "step": 2679
+ },
+ {
+ "epoch": 3.17,
+ "learning_rate": 0.00015334016037935196,
+ "loss": 0.5548,
+ "step": 2680
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015330821722384037,
+ "loss": 0.7494,
+ "step": 2681
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015327626646773976,
+ "loss": 0.5569,
+ "step": 2682
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015324430811560573,
+ "loss": 0.2622,
+ "step": 2683
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001532123421719948,
+ "loss": 0.3749,
+ "step": 2684
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015318036864146457,
+ "loss": 0.3959,
+ "step": 2685
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001531483875285738,
+ "loss": 0.5243,
+ "step": 2686
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.0001531163988378823,
+ "loss": 0.3115,
+ "step": 2687
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015308440257395093,
+ "loss": 0.2385,
+ "step": 2688
+ },
+ {
+ "epoch": 3.18,
+ "learning_rate": 0.00015305239874134174,
+ "loss": 0.4431,
+ "step": 2689
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.0001530203873446177,
+ "loss": 0.378,
+ "step": 2690
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015298836838834298,
+ "loss": 0.4521,
+ "step": 2691
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015295634187708279,
+ "loss": 0.6309,
+ "step": 2692
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015292430781540335,
+ "loss": 0.4355,
+ "step": 2693
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015289226620787208,
+ "loss": 0.4537,
+ "step": 2694
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.0001528602170590574,
+ "loss": 0.4305,
+ "step": 2695
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015282816037352878,
+ "loss": 0.5355,
+ "step": 2696
+ },
+ {
+ "epoch": 3.19,
+ "learning_rate": 0.00015279609615585687,
+ "loss": 0.5243,
+ "step": 2697
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001527640244106133,
+ "loss": 0.5334,
+ "step": 2698
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015273194514237078,
+ "loss": 0.5409,
+ "step": 2699
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001526998583557031,
+ "loss": 0.4042,
+ "step": 2700
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015266776405518524,
+ "loss": 0.5536,
+ "step": 2701
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.000152635662245393,
+ "loss": 0.2743,
+ "step": 2702
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015260355293090353,
+ "loss": 0.4762,
+ "step": 2703
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.00015257143611629482,
+ "loss": 0.4552,
+ "step": 2704
+ },
+ {
+ "epoch": 3.2,
+ "learning_rate": 0.0001525393118061461,
+ "loss": 0.5395,
+ "step": 2705
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001525071800050375,
+ "loss": 0.4297,
+ "step": 2706
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015247504071755046,
+ "loss": 0.364,
+ "step": 2707
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015244289394826722,
+ "loss": 0.9499,
+ "step": 2708
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015241073970177126,
+ "loss": 0.579,
+ "step": 2709
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.000152378577982647,
+ "loss": 0.3111,
+ "step": 2710
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001523464087954801,
+ "loss": 0.3345,
+ "step": 2711
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015231423214485715,
+ "loss": 0.4628,
+ "step": 2712
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.00015228204803536586,
+ "loss": 0.4803,
+ "step": 2713
+ },
+ {
+ "epoch": 3.21,
+ "learning_rate": 0.0001522498564715949,
+ "loss": 0.4164,
+ "step": 2714
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015221765745813417,
+ "loss": 0.6468,
+ "step": 2715
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015218545099957449,
+ "loss": 0.4495,
+ "step": 2716
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015215323710050785,
+ "loss": 0.4184,
+ "step": 2717
+ },
+ {
+ "epoch": 3.22,
+ "eval_loss": 2.9206559658050537,
+ "eval_runtime": 283.9002,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 2717
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015212101576552722,
+ "loss": 0.4215,
+ "step": 2718
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015208878699922668,
+ "loss": 1.4488,
+ "step": 2719
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.0001520565508062013,
+ "loss": 0.4449,
+ "step": 2720
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.0001520243071910473,
+ "loss": 0.2853,
+ "step": 2721
+ },
+ {
+ "epoch": 3.22,
+ "learning_rate": 0.00015199205615836191,
+ "loss": 0.4572,
+ "step": 2722
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015195979771274342,
+ "loss": 0.4436,
+ "step": 2723
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.0001519275318587912,
+ "loss": 0.38,
+ "step": 2724
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015189525860110563,
+ "loss": 0.4956,
+ "step": 2725
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015186297794428816,
+ "loss": 0.8514,
+ "step": 2726
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015183068989294132,
+ "loss": 0.4518,
+ "step": 2727
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015179839445166873,
+ "loss": 0.5581,
+ "step": 2728
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015176609162507497,
+ "loss": 0.5828,
+ "step": 2729
+ },
+ {
+ "epoch": 3.23,
+ "learning_rate": 0.00015173378141776568,
+ "loss": 0.4109,
+ "step": 2730
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015170146383434768,
+ "loss": 0.5762,
+ "step": 2731
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015166913887942868,
+ "loss": 0.4502,
+ "step": 2732
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015163680655761757,
+ "loss": 0.3736,
+ "step": 2733
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015160446687352417,
+ "loss": 0.3771,
+ "step": 2734
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015157211983175947,
+ "loss": 0.469,
+ "step": 2735
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015153976543693542,
+ "loss": 0.665,
+ "step": 2736
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.00015150740369366508,
+ "loss": 0.3495,
+ "step": 2737
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.0001514750346065625,
+ "loss": 0.4513,
+ "step": 2738
+ },
+ {
+ "epoch": 3.24,
+ "learning_rate": 0.0001514426581802428,
+ "loss": 0.4571,
+ "step": 2739
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015141027441932216,
+ "loss": 0.4197,
+ "step": 2740
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015137788332841785,
+ "loss": 0.3396,
+ "step": 2741
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015134548491214806,
+ "loss": 0.3547,
+ "step": 2742
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015131307917513214,
+ "loss": 0.3073,
+ "step": 2743
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015128066612199044,
+ "loss": 0.7091,
+ "step": 2744
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015124824575734428,
+ "loss": 0.2845,
+ "step": 2745
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015121581808581622,
+ "loss": 0.2903,
+ "step": 2746
+ },
+ {
+ "epoch": 3.25,
+ "learning_rate": 0.00015118338311202964,
+ "loss": 0.4065,
+ "step": 2747
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015115094084060916,
+ "loss": 0.6152,
+ "step": 2748
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015111849127618022,
+ "loss": 0.5352,
+ "step": 2749
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.0001510860344233695,
+ "loss": 0.414,
+ "step": 2750
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015105357028680457,
+ "loss": 0.4756,
+ "step": 2751
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015102109887111422,
+ "loss": 0.4644,
+ "step": 2752
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015098862018092808,
+ "loss": 0.4231,
+ "step": 2753
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.00015095613422087692,
+ "loss": 0.4617,
+ "step": 2754
+ },
+ {
+ "epoch": 3.26,
+ "learning_rate": 0.0001509236409955925,
+ "loss": 0.5876,
+ "step": 2755
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001508911405097077,
+ "loss": 0.5696,
+ "step": 2756
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015085863276785637,
+ "loss": 0.3826,
+ "step": 2757
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001508261177746734,
+ "loss": 0.4338,
+ "step": 2758
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001507935955347947,
+ "loss": 0.3546,
+ "step": 2759
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015076106605285724,
+ "loss": 0.413,
+ "step": 2760
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.000150728529333499,
+ "loss": 0.3954,
+ "step": 2761
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015069598538135906,
+ "loss": 0.5214,
+ "step": 2762
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.0001506634342010774,
+ "loss": 0.5239,
+ "step": 2763
+ },
+ {
+ "epoch": 3.27,
+ "learning_rate": 0.00015063087579729519,
+ "loss": 0.8681,
+ "step": 2764
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015059831017465449,
+ "loss": 0.4616,
+ "step": 2765
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015056573733779848,
+ "loss": 0.4721,
+ "step": 2766
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015053315729137128,
+ "loss": 0.4449,
+ "step": 2767
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.0001505005700400182,
+ "loss": 0.569,
+ "step": 2768
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015046797558838535,
+ "loss": 0.4926,
+ "step": 2769
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015043537394112007,
+ "loss": 0.462,
+ "step": 2770
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015040276510287063,
+ "loss": 0.6983,
+ "step": 2771
+ },
+ {
+ "epoch": 3.28,
+ "learning_rate": 0.00015037014907828632,
+ "loss": 0.4644,
+ "step": 2772
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001503375258720175,
+ "loss": 0.5924,
+ "step": 2773
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015030489548871544,
+ "loss": 0.5282,
+ "step": 2774
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015027225793303264,
+ "loss": 0.4757,
+ "step": 2775
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015023961320962247,
+ "loss": 0.5014,
+ "step": 2776
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001502069613231393,
+ "loss": 0.3455,
+ "step": 2777
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015017430227823864,
+ "loss": 0.4525,
+ "step": 2778
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.0001501416360795769,
+ "loss": 0.51,
+ "step": 2779
+ },
+ {
+ "epoch": 3.29,
+ "learning_rate": 0.00015010896273181165,
+ "loss": 0.3766,
+ "step": 2780
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001500762822396013,
+ "loss": 0.3162,
+ "step": 2781
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00015004359460760546,
+ "loss": 0.406,
+ "step": 2782
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00015001089984048463,
+ "loss": 0.4671,
+ "step": 2783
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014997819794290034,
+ "loss": 0.4299,
+ "step": 2784
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014994548891951524,
+ "loss": 0.5494,
+ "step": 2785
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001499127727749929,
+ "loss": 0.351,
+ "step": 2786
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014988004951399785,
+ "loss": 0.3807,
+ "step": 2787
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.00014984731914119586,
+ "loss": 0.3999,
+ "step": 2788
+ },
+ {
+ "epoch": 3.3,
+ "learning_rate": 0.0001498145816612534,
+ "loss": 0.7609,
+ "step": 2789
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014978183707883827,
+ "loss": 0.4466,
+ "step": 2790
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014974908539861905,
+ "loss": 0.592,
+ "step": 2791
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014971632662526545,
+ "loss": 0.4786,
+ "step": 2792
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014968356076344814,
+ "loss": 0.4087,
+ "step": 2793
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.0001496507878178388,
+ "loss": 0.3811,
+ "step": 2794
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014961800779311014,
+ "loss": 0.4091,
+ "step": 2795
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014958522069393593,
+ "loss": 0.6861,
+ "step": 2796
+ },
+ {
+ "epoch": 3.31,
+ "learning_rate": 0.00014955242652499084,
+ "loss": 0.3346,
+ "step": 2797
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014951962529095064,
+ "loss": 0.5417,
+ "step": 2798
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.000149486816996492,
+ "loss": 0.7325,
+ "step": 2799
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014945400164629278,
+ "loss": 0.5007,
+ "step": 2800
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014942117924503164,
+ "loss": 0.4217,
+ "step": 2801
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014938834979738835,
+ "loss": 0.5265,
+ "step": 2802
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014935551330804372,
+ "loss": 0.4376,
+ "step": 2803
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.0001493226697816795,
+ "loss": 0.5068,
+ "step": 2804
+ },
+ {
+ "epoch": 3.32,
+ "learning_rate": 0.00014928981922297842,
+ "loss": 0.6248,
+ "step": 2805
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001492569616366243,
+ "loss": 0.593,
+ "step": 2806
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001492240970273019,
+ "loss": 0.6713,
+ "step": 2807
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014919122539969697,
+ "loss": 0.5736,
+ "step": 2808
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014915834675849633,
+ "loss": 0.3006,
+ "step": 2809
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014912546110838775,
+ "loss": 0.5175,
+ "step": 2810
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014909256845405998,
+ "loss": 0.52,
+ "step": 2811
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014905966880020282,
+ "loss": 0.5491,
+ "step": 2812
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.00014902676215150702,
+ "loss": 0.6007,
+ "step": 2813
+ },
+ {
+ "epoch": 3.33,
+ "learning_rate": 0.0001489938485126644,
+ "loss": 0.6552,
+ "step": 2814
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014896092788836763,
+ "loss": 0.3624,
+ "step": 2815
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001489280002833106,
+ "loss": 0.2626,
+ "step": 2816
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014889506570218796,
+ "loss": 0.409,
+ "step": 2817
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014886212414969553,
+ "loss": 0.473,
+ "step": 2818
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014882917563052998,
+ "loss": 0.4205,
+ "step": 2819
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.00014879622014938915,
+ "loss": 0.4603,
+ "step": 2820
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001487632577109717,
+ "loss": 0.3522,
+ "step": 2821
+ },
+ {
+ "epoch": 3.34,
+ "learning_rate": 0.0001487302883199774,
+ "loss": 0.3787,
+ "step": 2822
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014869731198110695,
+ "loss": 0.6,
+ "step": 2823
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.000148664328699062,
+ "loss": 0.4291,
+ "step": 2824
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014863133847854533,
+ "loss": 0.4358,
+ "step": 2825
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.0001485983413242606,
+ "loss": 0.4144,
+ "step": 2826
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.0001485653372409125,
+ "loss": 0.842,
+ "step": 2827
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014853232623320662,
+ "loss": 0.3398,
+ "step": 2828
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014849930830584972,
+ "loss": 0.5005,
+ "step": 2829
+ },
+ {
+ "epoch": 3.35,
+ "learning_rate": 0.00014846628346354933,
+ "loss": 0.5777,
+ "step": 2830
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014843325171101413,
+ "loss": 0.3953,
+ "step": 2831
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014840021305295373,
+ "loss": 0.4056,
+ "step": 2832
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014836716749407872,
+ "loss": 0.7682,
+ "step": 2833
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.0001483341150391006,
+ "loss": 0.3208,
+ "step": 2834
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014830105569273204,
+ "loss": 0.4317,
+ "step": 2835
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014826798945968654,
+ "loss": 0.363,
+ "step": 2836
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014823491634467862,
+ "loss": 0.3784,
+ "step": 2837
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.00014820183635242374,
+ "loss": 0.9267,
+ "step": 2838
+ },
+ {
+ "epoch": 3.36,
+ "learning_rate": 0.0001481687494876385,
+ "loss": 0.4245,
+ "step": 2839
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014813565575504022,
+ "loss": 0.3929,
+ "step": 2840
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014810255515934747,
+ "loss": 0.5171,
+ "step": 2841
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014806944770527958,
+ "loss": 0.5181,
+ "step": 2842
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014803633339755703,
+ "loss": 0.4765,
+ "step": 2843
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014800321224090114,
+ "loss": 0.4433,
+ "step": 2844
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014797008424003428,
+ "loss": 0.461,
+ "step": 2845
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.0001479369493996798,
+ "loss": 0.5688,
+ "step": 2846
+ },
+ {
+ "epoch": 3.37,
+ "learning_rate": 0.00014790380772456197,
+ "loss": 0.4822,
+ "step": 2847
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.0001478706592194061,
+ "loss": 0.4993,
+ "step": 2848
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014783750388893842,
+ "loss": 0.3967,
+ "step": 2849
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014780434173788617,
+ "loss": 0.4708,
+ "step": 2850
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014777117277097758,
+ "loss": 0.5721,
+ "step": 2851
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014773799699294176,
+ "loss": 0.5276,
+ "step": 2852
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014770481440850891,
+ "loss": 0.4135,
+ "step": 2853
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.0001476716250224101,
+ "loss": 0.716,
+ "step": 2854
+ },
+ {
+ "epoch": 3.38,
+ "learning_rate": 0.00014763842883937743,
+ "loss": 0.3663,
+ "step": 2855
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014760522586414396,
+ "loss": 0.4105,
+ "step": 2856
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014757201610144372,
+ "loss": 0.4554,
+ "step": 2857
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014753879955601163,
+ "loss": 0.4366,
+ "step": 2858
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.0001475055762325837,
+ "loss": 0.3752,
+ "step": 2859
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014747234613589685,
+ "loss": 0.3747,
+ "step": 2860
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.000147439109270689,
+ "loss": 0.5533,
+ "step": 2861
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014740586564169892,
+ "loss": 0.4962,
+ "step": 2862
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014737261525366648,
+ "loss": 0.5318,
+ "step": 2863
+ },
+ {
+ "epoch": 3.39,
+ "learning_rate": 0.00014733935811133244,
+ "loss": 0.4592,
+ "step": 2864
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014730609421943855,
+ "loss": 0.429,
+ "step": 2865
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014727282358272754,
+ "loss": 0.4163,
+ "step": 2866
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014723954620594304,
+ "loss": 0.4811,
+ "step": 2867
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.0001472062620938297,
+ "loss": 0.4662,
+ "step": 2868
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014717297125113311,
+ "loss": 0.531,
+ "step": 2869
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.0001471396736825998,
+ "loss": 0.3233,
+ "step": 2870
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.00014710636939297724,
+ "loss": 0.4171,
+ "step": 2871
+ },
+ {
+ "epoch": 3.4,
+ "learning_rate": 0.000147073058387014,
+ "loss": 0.5412,
+ "step": 2872
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014703974066945943,
+ "loss": 0.4357,
+ "step": 2873
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014700641624506392,
+ "loss": 0.3889,
+ "step": 2874
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001469730851185788,
+ "loss": 0.456,
+ "step": 2875
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014693974729475636,
+ "loss": 0.4365,
+ "step": 2876
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001469064027783499,
+ "loss": 0.3947,
+ "step": 2877
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014687305157411355,
+ "loss": 0.5718,
+ "step": 2878
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.0001468396936868025,
+ "loss": 0.4652,
+ "step": 2879
+ },
+ {
+ "epoch": 3.41,
+ "learning_rate": 0.00014680632912117286,
+ "loss": 0.4242,
+ "step": 2880
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.0001467729578819817,
+ "loss": 0.5045,
+ "step": 2881
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014673957997398695,
+ "loss": 0.4098,
+ "step": 2882
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014670619540194766,
+ "loss": 0.597,
+ "step": 2883
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014667280417062374,
+ "loss": 0.5208,
+ "step": 2884
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014663940628477598,
+ "loss": 0.4881,
+ "step": 2885
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014660600174916627,
+ "loss": 0.5234,
+ "step": 2886
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.0001465725905685573,
+ "loss": 0.439,
+ "step": 2887
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014653917274771284,
+ "loss": 0.4498,
+ "step": 2888
+ },
+ {
+ "epoch": 3.42,
+ "learning_rate": 0.00014650574829139747,
+ "loss": 0.4837,
+ "step": 2889
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014647231720437686,
+ "loss": 0.4232,
+ "step": 2890
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014643887949141753,
+ "loss": 0.4467,
+ "step": 2891
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014640543515728695,
+ "loss": 0.3566,
+ "step": 2892
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014637198420675354,
+ "loss": 0.3888,
+ "step": 2893
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014633852664458673,
+ "loss": 0.326,
+ "step": 2894
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.0001463050624755568,
+ "loss": 0.3608,
+ "step": 2895
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014627159170443502,
+ "loss": 0.5326,
+ "step": 2896
+ },
+ {
+ "epoch": 3.43,
+ "learning_rate": 0.00014623811433599359,
+ "loss": 0.3171,
+ "step": 2897
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014620463037500568,
+ "loss": 0.4619,
+ "step": 2898
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014617113982624526,
+ "loss": 0.7739,
+ "step": 2899
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014613764269448751,
+ "loss": 0.4327,
+ "step": 2900
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.0001461041389845083,
+ "loss": 0.6078,
+ "step": 2901
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014607062870108456,
+ "loss": 0.3863,
+ "step": 2902
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014603711184899408,
+ "loss": 0.4787,
+ "step": 2903
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014600358843301568,
+ "loss": 0.2997,
+ "step": 2904
+ },
+ {
+ "epoch": 3.44,
+ "learning_rate": 0.00014597005845792905,
+ "loss": 0.3657,
+ "step": 2905
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014593652192851486,
+ "loss": 0.334,
+ "step": 2906
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014590297884955463,
+ "loss": 0.6809,
+ "step": 2907
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.0001458694292258309,
+ "loss": 0.4739,
+ "step": 2908
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014583587306212713,
+ "loss": 0.4139,
+ "step": 2909
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014580231036322768,
+ "loss": 0.3307,
+ "step": 2910
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014576874113391789,
+ "loss": 0.4155,
+ "step": 2911
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014573516537898394,
+ "loss": 0.4461,
+ "step": 2912
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.00014570158310321305,
+ "loss": 0.4775,
+ "step": 2913
+ },
+ {
+ "epoch": 3.45,
+ "learning_rate": 0.0001456679943113933,
+ "loss": 0.344,
+ "step": 2914
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014563439900831373,
+ "loss": 0.3568,
+ "step": 2915
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014560079719876424,
+ "loss": 0.3808,
+ "step": 2916
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.0001455671888875358,
+ "loss": 0.5467,
+ "step": 2917
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014553357407942022,
+ "loss": 0.5267,
+ "step": 2918
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014549995277921015,
+ "loss": 0.4476,
+ "step": 2919
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014546632499169937,
+ "loss": 0.4463,
+ "step": 2920
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014543269072168235,
+ "loss": 0.5553,
+ "step": 2921
+ },
+ {
+ "epoch": 3.46,
+ "learning_rate": 0.00014539904997395468,
+ "loss": 0.5476,
+ "step": 2922
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001453654027533128,
+ "loss": 0.4443,
+ "step": 2923
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014533174906455404,
+ "loss": 0.4353,
+ "step": 2924
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014529808891247667,
+ "loss": 0.4479,
+ "step": 2925
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014526442230187995,
+ "loss": 0.3951,
+ "step": 2926
+ },
+ {
+ "epoch": 3.47,
+ "eval_loss": 2.882225751876831,
+ "eval_runtime": 283.9462,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 2926
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014523074923756394,
+ "loss": 0.679,
+ "step": 2927
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001451970697243297,
+ "loss": 0.4178,
+ "step": 2928
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.0001451633837669792,
+ "loss": 0.4121,
+ "step": 2929
+ },
+ {
+ "epoch": 3.47,
+ "learning_rate": 0.00014512969137031538,
+ "loss": 0.3929,
+ "step": 2930
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014509599253914195,
+ "loss": 0.366,
+ "step": 2931
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.0001450622872782637,
+ "loss": 0.3528,
+ "step": 2932
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014502857559248617,
+ "loss": 0.5003,
+ "step": 2933
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014499485748661604,
+ "loss": 0.4901,
+ "step": 2934
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014496113296546067,
+ "loss": 0.4538,
+ "step": 2935
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014492740203382847,
+ "loss": 0.4549,
+ "step": 2936
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.0001448936646965288,
+ "loss": 0.5464,
+ "step": 2937
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014485992095837177,
+ "loss": 0.43,
+ "step": 2938
+ },
+ {
+ "epoch": 3.48,
+ "learning_rate": 0.00014482617082416858,
+ "loss": 0.5893,
+ "step": 2939
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001447924142987312,
+ "loss": 0.4947,
+ "step": 2940
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014475865138687262,
+ "loss": 0.4903,
+ "step": 2941
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001447248820934067,
+ "loss": 0.4933,
+ "step": 2942
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014469110642314817,
+ "loss": 0.4516,
+ "step": 2943
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001446573243809127,
+ "loss": 0.469,
+ "step": 2944
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014462353597151684,
+ "loss": 0.6531,
+ "step": 2945
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.00014458974119977818,
+ "loss": 0.2754,
+ "step": 2946
+ },
+ {
+ "epoch": 3.49,
+ "learning_rate": 0.0001445559400705151,
+ "loss": 0.5676,
+ "step": 2947
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014452213258854684,
+ "loss": 0.5903,
+ "step": 2948
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014448831875869364,
+ "loss": 0.5022,
+ "step": 2949
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.0001444544985857766,
+ "loss": 0.3509,
+ "step": 2950
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014442067207461775,
+ "loss": 0.3921,
+ "step": 2951
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014438683923004005,
+ "loss": 0.4997,
+ "step": 2952
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014435300005686728,
+ "loss": 0.6218,
+ "step": 2953
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014431915455992414,
+ "loss": 0.4097,
+ "step": 2954
+ },
+ {
+ "epoch": 3.5,
+ "learning_rate": 0.00014428530274403632,
+ "loss": 0.3478,
+ "step": 2955
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014425144461403035,
+ "loss": 0.4506,
+ "step": 2956
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014421758017473362,
+ "loss": 0.4025,
+ "step": 2957
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014418370943097448,
+ "loss": 0.3838,
+ "step": 2958
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014414983238758217,
+ "loss": 0.6366,
+ "step": 2959
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014411594904938682,
+ "loss": 0.4649,
+ "step": 2960
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014408205942121943,
+ "loss": 0.3361,
+ "step": 2961
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.00014404816350791188,
+ "loss": 0.3692,
+ "step": 2962
+ },
+ {
+ "epoch": 3.51,
+ "learning_rate": 0.0001440142613142971,
+ "loss": 0.6162,
+ "step": 2963
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014398035284520874,
+ "loss": 0.5935,
+ "step": 2964
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001439464381054814,
+ "loss": 0.545,
+ "step": 2965
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014391251709995061,
+ "loss": 0.4178,
+ "step": 2966
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014387858983345276,
+ "loss": 0.5552,
+ "step": 2967
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001438446563108251,
+ "loss": 0.4506,
+ "step": 2968
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014381071653690587,
+ "loss": 0.429,
+ "step": 2969
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014377677051653404,
+ "loss": 0.3897,
+ "step": 2970
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.0001437428182545497,
+ "loss": 0.4663,
+ "step": 2971
+ },
+ {
+ "epoch": 3.52,
+ "learning_rate": 0.00014370885975579364,
+ "loss": 0.4643,
+ "step": 2972
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001436748950251076,
+ "loss": 0.5433,
+ "step": 2973
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001436409240673342,
+ "loss": 0.4967,
+ "step": 2974
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.000143606946887317,
+ "loss": 0.3717,
+ "step": 2975
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014357296348990037,
+ "loss": 0.4166,
+ "step": 2976
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001435389738799296,
+ "loss": 0.455,
+ "step": 2977
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014350497806225087,
+ "loss": 0.4603,
+ "step": 2978
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.00014347097604171127,
+ "loss": 0.4325,
+ "step": 2979
+ },
+ {
+ "epoch": 3.53,
+ "learning_rate": 0.0001434369678231587,
+ "loss": 0.4375,
+ "step": 2980
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014340295341144202,
+ "loss": 0.4932,
+ "step": 2981
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014336893281141096,
+ "loss": 0.5264,
+ "step": 2982
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014333490602791608,
+ "loss": 0.4677,
+ "step": 2983
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014330087306580887,
+ "loss": 0.6505,
+ "step": 2984
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014326683392994167,
+ "loss": 0.4451,
+ "step": 2985
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.00014323278862516775,
+ "loss": 0.4025,
+ "step": 2986
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.0001431987371563412,
+ "loss": 0.5084,
+ "step": 2987
+ },
+ {
+ "epoch": 3.54,
+ "learning_rate": 0.000143164679528317,
+ "loss": 0.4806,
+ "step": 2988
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014313061574595115,
+ "loss": 0.3954,
+ "step": 2989
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014309654581410024,
+ "loss": 0.4339,
+ "step": 2990
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.000143062469737622,
+ "loss": 0.6739,
+ "step": 2991
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014302838752137487,
+ "loss": 0.6414,
+ "step": 2992
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014299429917021827,
+ "loss": 0.5075,
+ "step": 2993
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014296020468901246,
+ "loss": 0.4105,
+ "step": 2994
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014292610408261856,
+ "loss": 0.7371,
+ "step": 2995
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014289199735589852,
+ "loss": 0.7485,
+ "step": 2996
+ },
+ {
+ "epoch": 3.55,
+ "learning_rate": 0.00014285788451371534,
+ "loss": 0.7629,
+ "step": 2997
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014282376556093264,
+ "loss": 0.3849,
+ "step": 2998
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014278964050241512,
+ "loss": 0.5355,
+ "step": 2999
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014275550934302823,
+ "loss": 0.4077,
+ "step": 3000
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014272137208763832,
+ "loss": 0.5352,
+ "step": 3001
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014268722874111265,
+ "loss": 0.5257,
+ "step": 3002
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014265307930831932,
+ "loss": 0.4265,
+ "step": 3003
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.00014261892379412728,
+ "loss": 0.5776,
+ "step": 3004
+ },
+ {
+ "epoch": 3.56,
+ "learning_rate": 0.0001425847622034063,
+ "loss": 0.3521,
+ "step": 3005
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014255059454102722,
+ "loss": 0.6203,
+ "step": 3006
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014251642081186146,
+ "loss": 0.5238,
+ "step": 3007
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014248224102078152,
+ "loss": 0.3887,
+ "step": 3008
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014244805517266067,
+ "loss": 0.5001,
+ "step": 3009
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001424138632723731,
+ "loss": 0.555,
+ "step": 3010
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001423796653247938,
+ "loss": 0.6137,
+ "step": 3011
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.00014234546133479867,
+ "loss": 0.8052,
+ "step": 3012
+ },
+ {
+ "epoch": 3.57,
+ "learning_rate": 0.0001423112513072644,
+ "loss": 0.5392,
+ "step": 3013
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014227703524706867,
+ "loss": 0.5067,
+ "step": 3014
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.0001422428131590899,
+ "loss": 0.4016,
+ "step": 3015
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014220858504820742,
+ "loss": 0.4165,
+ "step": 3016
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014217435091930141,
+ "loss": 0.7395,
+ "step": 3017
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014214011077725292,
+ "loss": 0.4985,
+ "step": 3018
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014210586462694384,
+ "loss": 0.4821,
+ "step": 3019
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014207161247325691,
+ "loss": 0.6046,
+ "step": 3020
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014203735432107576,
+ "loss": 0.568,
+ "step": 3021
+ },
+ {
+ "epoch": 3.58,
+ "learning_rate": 0.00014200309017528486,
+ "loss": 0.7383,
+ "step": 3022
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.0001419688200407695,
+ "loss": 0.5296,
+ "step": 3023
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014193454392241592,
+ "loss": 0.6391,
+ "step": 3024
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014190026182511102,
+ "loss": 0.4523,
+ "step": 3025
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.0001418659737537428,
+ "loss": 0.482,
+ "step": 3026
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014183167971319998,
+ "loss": 0.4519,
+ "step": 3027
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014179737970837207,
+ "loss": 0.4156,
+ "step": 3028
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014176307374414956,
+ "loss": 0.5142,
+ "step": 3029
+ },
+ {
+ "epoch": 3.59,
+ "learning_rate": 0.00014172876182542372,
+ "loss": 0.4068,
+ "step": 3030
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014169444395708666,
+ "loss": 0.5908,
+ "step": 3031
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.0001416601201440314,
+ "loss": 0.511,
+ "step": 3032
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014162579039115174,
+ "loss": 0.5165,
+ "step": 3033
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014159145470334235,
+ "loss": 0.4449,
+ "step": 3034
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014155711308549878,
+ "loss": 0.4808,
+ "step": 3035
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014152276554251736,
+ "loss": 0.5365,
+ "step": 3036
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.00014148841207929527,
+ "loss": 0.6016,
+ "step": 3037
+ },
+ {
+ "epoch": 3.6,
+ "learning_rate": 0.0001414540527007307,
+ "loss": 0.379,
+ "step": 3038
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014141968741172238,
+ "loss": 0.6687,
+ "step": 3039
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014138531621717018,
+ "loss": 0.6219,
+ "step": 3040
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001413509391219746,
+ "loss": 0.3408,
+ "step": 3041
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014131655613103708,
+ "loss": 0.5148,
+ "step": 3042
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001412821672492599,
+ "loss": 0.3811,
+ "step": 3043
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001412477724815462,
+ "loss": 0.4691,
+ "step": 3044
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014121337183279988,
+ "loss": 0.6919,
+ "step": 3045
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.0001411789653079257,
+ "loss": 0.5804,
+ "step": 3046
+ },
+ {
+ "epoch": 3.61,
+ "learning_rate": 0.00014114455291182933,
+ "loss": 0.418,
+ "step": 3047
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001411101346494172,
+ "loss": 0.4422,
+ "step": 3048
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410757105255966,
+ "loss": 0.389,
+ "step": 3049
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410412805452757,
+ "loss": 0.4083,
+ "step": 3050
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.0001410068447133634,
+ "loss": 0.8703,
+ "step": 3051
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014097240303476954,
+ "loss": 0.4724,
+ "step": 3052
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014093795551440474,
+ "loss": 0.6257,
+ "step": 3053
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014090350215718048,
+ "loss": 0.5212,
+ "step": 3054
+ },
+ {
+ "epoch": 3.62,
+ "learning_rate": 0.00014086904296800902,
+ "loss": 0.4429,
+ "step": 3055
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014083457795180355,
+ "loss": 0.3496,
+ "step": 3056
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014080010711347798,
+ "loss": 0.3402,
+ "step": 3057
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.0001407656304579471,
+ "loss": 0.4783,
+ "step": 3058
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014073114799012653,
+ "loss": 0.3987,
+ "step": 3059
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014069665971493274,
+ "loss": 0.4755,
+ "step": 3060
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014066216563728303,
+ "loss": 0.4792,
+ "step": 3061
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014062766576209546,
+ "loss": 0.4275,
+ "step": 3062
+ },
+ {
+ "epoch": 3.63,
+ "learning_rate": 0.00014059316009428893,
+ "loss": 0.3598,
+ "step": 3063
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014055864863878325,
+ "loss": 0.4887,
+ "step": 3064
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.000140524131400499,
+ "loss": 0.5421,
+ "step": 3065
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014048960838435753,
+ "loss": 0.352,
+ "step": 3066
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014045507959528118,
+ "loss": 0.3124,
+ "step": 3067
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014042054503819287,
+ "loss": 0.3955,
+ "step": 3068
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014038600471801658,
+ "loss": 0.455,
+ "step": 3069
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014035145863967692,
+ "loss": 0.5177,
+ "step": 3070
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014031690680809945,
+ "loss": 0.4205,
+ "step": 3071
+ },
+ {
+ "epoch": 3.64,
+ "learning_rate": 0.00014028234922821054,
+ "loss": 0.4832,
+ "step": 3072
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001402477859049373,
+ "loss": 0.3496,
+ "step": 3073
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001402132168432077,
+ "loss": 0.5404,
+ "step": 3074
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014017864204795058,
+ "loss": 0.5106,
+ "step": 3075
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001401440615240955,
+ "loss": 0.6611,
+ "step": 3076
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014010947527657295,
+ "loss": 0.3879,
+ "step": 3077
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001400748833103141,
+ "loss": 0.3054,
+ "step": 3078
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.00014004028563025108,
+ "loss": 0.3461,
+ "step": 3079
+ },
+ {
+ "epoch": 3.65,
+ "learning_rate": 0.0001400056822413167,
+ "loss": 0.482,
+ "step": 3080
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.0001399710731484447,
+ "loss": 0.3285,
+ "step": 3081
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013993645835656953,
+ "loss": 0.363,
+ "step": 3082
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013990183787062661,
+ "loss": 0.5092,
+ "step": 3083
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013986721169555194,
+ "loss": 0.3009,
+ "step": 3084
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013983257983628253,
+ "loss": 0.3831,
+ "step": 3085
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.0001397979422977561,
+ "loss": 0.3718,
+ "step": 3086
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013976329908491118,
+ "loss": 0.3401,
+ "step": 3087
+ },
+ {
+ "epoch": 3.66,
+ "learning_rate": 0.00013972865020268722,
+ "loss": 0.5294,
+ "step": 3088
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013969399565602435,
+ "loss": 0.5054,
+ "step": 3089
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001396593354498635,
+ "loss": 0.4247,
+ "step": 3090
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013962466958914658,
+ "loss": 0.431,
+ "step": 3091
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013958999807881604,
+ "loss": 0.6341,
+ "step": 3092
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001395553209238154,
+ "loss": 0.5126,
+ "step": 3093
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013952063812908881,
+ "loss": 0.3775,
+ "step": 3094
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.0001394859496995813,
+ "loss": 0.5149,
+ "step": 3095
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013945125564023868,
+ "loss": 0.2879,
+ "step": 3096
+ },
+ {
+ "epoch": 3.67,
+ "learning_rate": 0.00013941655595600756,
+ "loss": 0.5621,
+ "step": 3097
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013938185065183532,
+ "loss": 0.408,
+ "step": 3098
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013934713973267024,
+ "loss": 0.4247,
+ "step": 3099
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001393124232034613,
+ "loss": 0.4224,
+ "step": 3100
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001392777010691584,
+ "loss": 0.4142,
+ "step": 3101
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013924297333471204,
+ "loss": 0.6004,
+ "step": 3102
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013920824000507374,
+ "loss": 0.6016,
+ "step": 3103
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.0001391735010851956,
+ "loss": 0.4669,
+ "step": 3104
+ },
+ {
+ "epoch": 3.68,
+ "learning_rate": 0.00013913875658003074,
+ "loss": 0.3987,
+ "step": 3105
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001391040064945329,
+ "loss": 0.471,
+ "step": 3106
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001390692508336568,
+ "loss": 0.6135,
+ "step": 3107
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013903448960235766,
+ "loss": 0.5369,
+ "step": 3108
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013899972280559183,
+ "loss": 0.3295,
+ "step": 3109
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001389649504483162,
+ "loss": 0.309,
+ "step": 3110
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013893017253548858,
+ "loss": 0.4026,
+ "step": 3111
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.00013889538907206755,
+ "loss": 0.4724,
+ "step": 3112
+ },
+ {
+ "epoch": 3.69,
+ "learning_rate": 0.0001388606000630125,
+ "loss": 0.3606,
+ "step": 3113
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001388258055132835,
+ "loss": 0.4894,
+ "step": 3114
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001387910054278416,
+ "loss": 0.4832,
+ "step": 3115
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001387561998116484,
+ "loss": 0.4604,
+ "step": 3116
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013872138866966656,
+ "loss": 0.4377,
+ "step": 3117
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013868657200685934,
+ "loss": 0.3965,
+ "step": 3118
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.0001386517498281908,
+ "loss": 0.7653,
+ "step": 3119
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013861692213862584,
+ "loss": 0.5213,
+ "step": 3120
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013858208894313017,
+ "loss": 0.9296,
+ "step": 3121
+ },
+ {
+ "epoch": 3.7,
+ "learning_rate": 0.00013854725024667016,
+ "loss": 0.7738,
+ "step": 3122
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013851240605421315,
+ "loss": 0.5826,
+ "step": 3123
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.0001384775563707271,
+ "loss": 0.5502,
+ "step": 3124
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013844270120118085,
+ "loss": 0.3535,
+ "step": 3125
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.0001384078405505439,
+ "loss": 0.4853,
+ "step": 3126
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013837297442378675,
+ "loss": 0.5819,
+ "step": 3127
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013833810282588044,
+ "loss": 0.3728,
+ "step": 3128
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.00013830322576179697,
+ "loss": 0.3327,
+ "step": 3129
+ },
+ {
+ "epoch": 3.71,
+ "learning_rate": 0.000138268343236509,
+ "loss": 0.4618,
+ "step": 3130
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013823345525499004,
+ "loss": 0.3377,
+ "step": 3131
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013819856182221434,
+ "loss": 0.3154,
+ "step": 3132
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013816366294315695,
+ "loss": 0.5116,
+ "step": 3133
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.0001381287586227937,
+ "loss": 0.4987,
+ "step": 3134
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013809384886610118,
+ "loss": 0.5596,
+ "step": 3135
+ },
+ {
+ "epoch": 3.72,
+ "eval_loss": 2.939779281616211,
+ "eval_runtime": 283.9953,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 3135
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013805893367805678,
+ "loss": 0.5128,
+ "step": 3136
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.0001380240130636386,
+ "loss": 0.3149,
+ "step": 3137
+ },
+ {
+ "epoch": 3.72,
+ "learning_rate": 0.00013798908702782558,
+ "loss": 0.4984,
+ "step": 3138
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.0001379541555755974,
+ "loss": 0.626,
+ "step": 3139
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013791921871193457,
+ "loss": 0.4949,
+ "step": 3140
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013788427644181823,
+ "loss": 0.5654,
+ "step": 3141
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.0001378493287702305,
+ "loss": 0.4197,
+ "step": 3142
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013781437570215406,
+ "loss": 0.4341,
+ "step": 3143
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013777941724257253,
+ "loss": 0.3576,
+ "step": 3144
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013774445339647014,
+ "loss": 0.3098,
+ "step": 3145
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013770948416883205,
+ "loss": 0.6052,
+ "step": 3146
+ },
+ {
+ "epoch": 3.73,
+ "learning_rate": 0.00013767450956464407,
+ "loss": 0.4327,
+ "step": 3147
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013763952958889287,
+ "loss": 0.4717,
+ "step": 3148
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.0001376045442465657,
+ "loss": 0.5263,
+ "step": 3149
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013756955354265085,
+ "loss": 0.5021,
+ "step": 3150
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013753455748213714,
+ "loss": 0.4066,
+ "step": 3151
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013749955607001433,
+ "loss": 0.3461,
+ "step": 3152
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013746454931127278,
+ "loss": 0.4318,
+ "step": 3153
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.00013742953721090372,
+ "loss": 0.4195,
+ "step": 3154
+ },
+ {
+ "epoch": 3.74,
+ "learning_rate": 0.0001373945197738991,
+ "loss": 0.3862,
+ "step": 3155
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013735949700525163,
+ "loss": 0.5916,
+ "step": 3156
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013732446890995484,
+ "loss": 0.5336,
+ "step": 3157
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013728943549300295,
+ "loss": 0.4104,
+ "step": 3158
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013725439675939095,
+ "loss": 0.541,
+ "step": 3159
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013721935271411464,
+ "loss": 0.5173,
+ "step": 3160
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013718430336217045,
+ "loss": 0.3866,
+ "step": 3161
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013714924870855571,
+ "loss": 0.6113,
+ "step": 3162
+ },
+ {
+ "epoch": 3.75,
+ "learning_rate": 0.00013711418875826846,
+ "loss": 0.5817,
+ "step": 3163
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001370791235163075,
+ "loss": 0.5331,
+ "step": 3164
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013704405298767229,
+ "loss": 0.5744,
+ "step": 3165
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001370089771773632,
+ "loss": 0.494,
+ "step": 3166
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013697389609038124,
+ "loss": 0.4537,
+ "step": 3167
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013693880973172822,
+ "loss": 0.5494,
+ "step": 3168
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013690371810640665,
+ "loss": 0.537,
+ "step": 3169
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.0001368686212194199,
+ "loss": 0.4698,
+ "step": 3170
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013683351907577194,
+ "loss": 0.5254,
+ "step": 3171
+ },
+ {
+ "epoch": 3.76,
+ "learning_rate": 0.00013679841168046767,
+ "loss": 0.3857,
+ "step": 3172
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013676329903851254,
+ "loss": 0.4464,
+ "step": 3173
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.0001367281811549129,
+ "loss": 0.5651,
+ "step": 3174
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.0001366930580346758,
+ "loss": 0.4192,
+ "step": 3175
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.000136657929682809,
+ "loss": 0.3364,
+ "step": 3176
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013662279610432104,
+ "loss": 0.3539,
+ "step": 3177
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013658765730422125,
+ "loss": 0.6074,
+ "step": 3178
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013655251328751957,
+ "loss": 0.5322,
+ "step": 3179
+ },
+ {
+ "epoch": 3.77,
+ "learning_rate": 0.00013651736405922686,
+ "loss": 0.4176,
+ "step": 3180
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013648220962435458,
+ "loss": 0.4878,
+ "step": 3181
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.000136447049987915,
+ "loss": 0.6351,
+ "step": 3182
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013641188515492109,
+ "loss": 0.4487,
+ "step": 3183
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001363767151303866,
+ "loss": 0.4451,
+ "step": 3184
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013634153991932607,
+ "loss": 0.4944,
+ "step": 3185
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001363063595267547,
+ "loss": 0.5932,
+ "step": 3186
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.00013627117395768833,
+ "loss": 0.4964,
+ "step": 3187
+ },
+ {
+ "epoch": 3.78,
+ "learning_rate": 0.0001362359832171438,
+ "loss": 0.6795,
+ "step": 3188
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013620078731013845,
+ "loss": 0.3862,
+ "step": 3189
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001361655862416905,
+ "loss": 0.3425,
+ "step": 3190
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001361303800168188,
+ "loss": 0.4361,
+ "step": 3191
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.0001360951686405431,
+ "loss": 0.5774,
+ "step": 3192
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013605995211788365,
+ "loss": 0.4044,
+ "step": 3193
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013602473045386165,
+ "loss": 0.3858,
+ "step": 3194
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013598950365349883,
+ "loss": 0.6136,
+ "step": 3195
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013595427172181785,
+ "loss": 0.329,
+ "step": 3196
+ },
+ {
+ "epoch": 3.79,
+ "learning_rate": 0.00013591903466384203,
+ "loss": 0.3898,
+ "step": 3197
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013588379248459536,
+ "loss": 0.4809,
+ "step": 3198
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013584854518910262,
+ "loss": 0.4108,
+ "step": 3199
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013581329278238927,
+ "loss": 0.4655,
+ "step": 3200
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013577803526948162,
+ "loss": 0.4657,
+ "step": 3201
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013574277265540654,
+ "loss": 0.4842,
+ "step": 3202
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013570750494519175,
+ "loss": 0.4593,
+ "step": 3203
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013567223214386564,
+ "loss": 0.435,
+ "step": 3204
+ },
+ {
+ "epoch": 3.8,
+ "learning_rate": 0.00013563695425645737,
+ "loss": 0.7146,
+ "step": 3205
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013560167128799674,
+ "loss": 0.5027,
+ "step": 3206
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013556638324351442,
+ "loss": 0.4844,
+ "step": 3207
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013553109012804163,
+ "loss": 0.7605,
+ "step": 3208
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013549579194661044,
+ "loss": 0.396,
+ "step": 3209
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013546048870425356,
+ "loss": 0.5178,
+ "step": 3210
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013542518040600453,
+ "loss": 0.6946,
+ "step": 3211
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.0001353898670568975,
+ "loss": 0.5054,
+ "step": 3212
+ },
+ {
+ "epoch": 3.81,
+ "learning_rate": 0.00013535454866196739,
+ "loss": 0.4495,
+ "step": 3213
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013531922522624982,
+ "loss": 0.5138,
+ "step": 3214
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001352838967547812,
+ "loss": 0.4706,
+ "step": 3215
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013524856325259848,
+ "loss": 0.5193,
+ "step": 3216
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001352132247247396,
+ "loss": 0.4436,
+ "step": 3217
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013517788117624292,
+ "loss": 0.4139,
+ "step": 3218
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.0001351425326121478,
+ "loss": 0.5937,
+ "step": 3219
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.000135107179037494,
+ "loss": 0.3375,
+ "step": 3220
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013507182045732234,
+ "loss": 0.3712,
+ "step": 3221
+ },
+ {
+ "epoch": 3.82,
+ "learning_rate": 0.00013503645687667408,
+ "loss": 0.3424,
+ "step": 3222
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013500108830059133,
+ "loss": 0.3333,
+ "step": 3223
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013496571473411688,
+ "loss": 0.4042,
+ "step": 3224
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013493033618229417,
+ "loss": 0.4963,
+ "step": 3225
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.0001348949526501675,
+ "loss": 0.3946,
+ "step": 3226
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013485956414278178,
+ "loss": 0.5807,
+ "step": 3227
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013482417066518256,
+ "loss": 0.4561,
+ "step": 3228
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013478877222241627,
+ "loss": 0.4964,
+ "step": 3229
+ },
+ {
+ "epoch": 3.83,
+ "learning_rate": 0.00013475336881952986,
+ "loss": 0.6429,
+ "step": 3230
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013471796046157116,
+ "loss": 0.5466,
+ "step": 3231
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013468254715358861,
+ "loss": 0.3882,
+ "step": 3232
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013464712890063138,
+ "loss": 0.5006,
+ "step": 3233
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.0001346117057077493,
+ "loss": 0.494,
+ "step": 3234
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013457627757999303,
+ "loss": 0.5444,
+ "step": 3235
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013454084452241372,
+ "loss": 0.3714,
+ "step": 3236
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.00013450540654006348,
+ "loss": 0.3335,
+ "step": 3237
+ },
+ {
+ "epoch": 3.84,
+ "learning_rate": 0.0001344699636379949,
+ "loss": 0.4771,
+ "step": 3238
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013443451582126144,
+ "loss": 0.466,
+ "step": 3239
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013439906309491712,
+ "loss": 0.5537,
+ "step": 3240
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013436360546401676,
+ "loss": 0.5899,
+ "step": 3241
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013432814293361584,
+ "loss": 0.443,
+ "step": 3242
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013429267550877055,
+ "loss": 0.4238,
+ "step": 3243
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013425720319453773,
+ "loss": 0.6529,
+ "step": 3244
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013422172599597505,
+ "loss": 0.6163,
+ "step": 3245
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013418624391814068,
+ "loss": 0.5183,
+ "step": 3246
+ },
+ {
+ "epoch": 3.85,
+ "learning_rate": 0.00013415075696609363,
+ "loss": 0.7659,
+ "step": 3247
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001341152651448936,
+ "loss": 0.3717,
+ "step": 3248
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340797684596009,
+ "loss": 0.6885,
+ "step": 3249
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340442669152766,
+ "loss": 0.4483,
+ "step": 3250
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.0001340087605169825,
+ "loss": 0.3417,
+ "step": 3251
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013397324926978094,
+ "loss": 0.4751,
+ "step": 3252
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013393773317873508,
+ "loss": 0.4448,
+ "step": 3253
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013390221224890878,
+ "loss": 0.6278,
+ "step": 3254
+ },
+ {
+ "epoch": 3.86,
+ "learning_rate": 0.00013386668648536655,
+ "loss": 0.2995,
+ "step": 3255
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013383115589317353,
+ "loss": 0.535,
+ "step": 3256
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013379562047739568,
+ "loss": 0.4972,
+ "step": 3257
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013376008024309948,
+ "loss": 0.4821,
+ "step": 3258
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.0001337245351953523,
+ "loss": 0.392,
+ "step": 3259
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.000133688985339222,
+ "loss": 0.413,
+ "step": 3260
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013365343067977726,
+ "loss": 0.4689,
+ "step": 3261
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013361787122208744,
+ "loss": 0.4737,
+ "step": 3262
+ },
+ {
+ "epoch": 3.87,
+ "learning_rate": 0.00013358230697122246,
+ "loss": 0.5033,
+ "step": 3263
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013354673793225302,
+ "loss": 0.4901,
+ "step": 3264
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013351116411025054,
+ "loss": 0.5776,
+ "step": 3265
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013347558551028702,
+ "loss": 0.5005,
+ "step": 3266
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013344000213743522,
+ "loss": 0.6475,
+ "step": 3267
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013340441399676856,
+ "loss": 0.4394,
+ "step": 3268
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001333688210933611,
+ "loss": 0.4351,
+ "step": 3269
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001333332234322876,
+ "loss": 0.4526,
+ "step": 3270
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.0001332976210186236,
+ "loss": 0.3006,
+ "step": 3271
+ },
+ {
+ "epoch": 3.88,
+ "learning_rate": 0.00013326201385744518,
+ "loss": 0.382,
+ "step": 3272
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013322640195382907,
+ "loss": 0.3488,
+ "step": 3273
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013319078531285285,
+ "loss": 0.5538,
+ "step": 3274
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013315516393959463,
+ "loss": 0.5328,
+ "step": 3275
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013311953783913324,
+ "loss": 0.5216,
+ "step": 3276
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001330839070165482,
+ "loss": 0.3845,
+ "step": 3277
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001330482714769197,
+ "loss": 0.5293,
+ "step": 3278
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.00013301263122532855,
+ "loss": 0.5415,
+ "step": 3279
+ },
+ {
+ "epoch": 3.89,
+ "learning_rate": 0.0001329769862668563,
+ "loss": 0.5309,
+ "step": 3280
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013294133660658516,
+ "loss": 0.4629,
+ "step": 3281
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013290568224959794,
+ "loss": 0.4329,
+ "step": 3282
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013287002320097821,
+ "loss": 0.3973,
+ "step": 3283
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.0001328343594658102,
+ "loss": 0.3417,
+ "step": 3284
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013279869104917873,
+ "loss": 0.4784,
+ "step": 3285
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013276301795616936,
+ "loss": 0.3668,
+ "step": 3286
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.0001327273401918683,
+ "loss": 0.3726,
+ "step": 3287
+ },
+ {
+ "epoch": 3.9,
+ "learning_rate": 0.00013269165776136238,
+ "loss": 0.518,
+ "step": 3288
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013265597066973922,
+ "loss": 0.3864,
+ "step": 3289
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013262027892208694,
+ "loss": 0.4249,
+ "step": 3290
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013258458252349444,
+ "loss": 0.395,
+ "step": 3291
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013254888147905126,
+ "loss": 0.8359,
+ "step": 3292
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013251317579384756,
+ "loss": 0.5028,
+ "step": 3293
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.0001324774654729742,
+ "loss": 0.4216,
+ "step": 3294
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.0001324417505215227,
+ "loss": 0.6145,
+ "step": 3295
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013240603094458522,
+ "loss": 0.6158,
+ "step": 3296
+ },
+ {
+ "epoch": 3.91,
+ "learning_rate": 0.00013237030674725464,
+ "loss": 0.5101,
+ "step": 3297
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001323345779346244,
+ "loss": 0.6933,
+ "step": 3298
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001322988445117886,
+ "loss": 0.4192,
+ "step": 3299
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001322631064838422,
+ "loss": 0.4549,
+ "step": 3300
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013222736385588054,
+ "loss": 0.4947,
+ "step": 3301
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013219161663299982,
+ "loss": 0.5383,
+ "step": 3302
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013215586482029669,
+ "loss": 0.4919,
+ "step": 3303
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.0001321201084228687,
+ "loss": 0.4603,
+ "step": 3304
+ },
+ {
+ "epoch": 3.92,
+ "learning_rate": 0.00013208434744581385,
+ "loss": 0.3127,
+ "step": 3305
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013204858189423097,
+ "loss": 0.754,
+ "step": 3306
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013201281177321935,
+ "loss": 0.3746,
+ "step": 3307
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013197703708787913,
+ "loss": 0.5576,
+ "step": 3308
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.0001319412578433109,
+ "loss": 0.4992,
+ "step": 3309
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013190547404461598,
+ "loss": 0.4533,
+ "step": 3310
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.0001318696856968965,
+ "loss": 0.4155,
+ "step": 3311
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013183389280525497,
+ "loss": 0.3661,
+ "step": 3312
+ },
+ {
+ "epoch": 3.93,
+ "learning_rate": 0.00013179809537479476,
+ "loss": 0.4512,
+ "step": 3313
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013176229341061975,
+ "loss": 0.5895,
+ "step": 3314
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013172648691783454,
+ "loss": 0.3308,
+ "step": 3315
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013169067590154432,
+ "loss": 0.4128,
+ "step": 3316
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013165486036685503,
+ "loss": 0.5432,
+ "step": 3317
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001316190403188731,
+ "loss": 0.4297,
+ "step": 3318
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013158321576270575,
+ "loss": 0.4259,
+ "step": 3319
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001315473867034608,
+ "loss": 0.4428,
+ "step": 3320
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.0001315115531462466,
+ "loss": 0.6495,
+ "step": 3321
+ },
+ {
+ "epoch": 3.94,
+ "learning_rate": 0.00013147571509617228,
+ "loss": 0.5706,
+ "step": 3322
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001314398725583476,
+ "loss": 0.3647,
+ "step": 3323
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001314040255378829,
+ "loss": 0.4864,
+ "step": 3324
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013136817403988917,
+ "loss": 0.4197,
+ "step": 3325
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013133231806947805,
+ "loss": 0.4818,
+ "step": 3326
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.00013129645763176184,
+ "loss": 0.4201,
+ "step": 3327
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001312605927318534,
+ "loss": 0.4352,
+ "step": 3328
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001312247233748664,
+ "loss": 0.2785,
+ "step": 3329
+ },
+ {
+ "epoch": 3.95,
+ "learning_rate": 0.0001311888495659149,
+ "loss": 0.4424,
+ "step": 3330
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013115297131011382,
+ "loss": 0.4258,
+ "step": 3331
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013111708861257855,
+ "loss": 0.4332,
+ "step": 3332
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013108120147842519,
+ "loss": 0.3578,
+ "step": 3333
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.0001310453099127705,
+ "loss": 0.4219,
+ "step": 3334
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.0001310094139207318,
+ "loss": 0.5837,
+ "step": 3335
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.0001309735135074271,
+ "loss": 0.3965,
+ "step": 3336
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013093760867797502,
+ "loss": 0.4764,
+ "step": 3337
+ },
+ {
+ "epoch": 3.96,
+ "learning_rate": 0.00013090169943749476,
+ "loss": 0.4933,
+ "step": 3338
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013086578579110623,
+ "loss": 0.3434,
+ "step": 3339
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001308298677439299,
+ "loss": 0.5931,
+ "step": 3340
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013079394530108695,
+ "loss": 0.442,
+ "step": 3341
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001307580184676991,
+ "loss": 0.3229,
+ "step": 3342
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001307220872488888,
+ "loss": 0.4567,
+ "step": 3343
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013068615164977895,
+ "loss": 0.6224,
+ "step": 3344
+ },
+ {
+ "epoch": 3.97,
+ "eval_loss": 2.954587936401367,
+ "eval_runtime": 283.9817,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 3344
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013065021167549322,
+ "loss": 0.6767,
+ "step": 3345
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.0001306142673311559,
+ "loss": 0.4809,
+ "step": 3346
+ },
+ {
+ "epoch": 3.97,
+ "learning_rate": 0.00013057831862189187,
+ "loss": 0.4563,
+ "step": 3347
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013054236555282657,
+ "loss": 0.4674,
+ "step": 3348
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013050640812908623,
+ "loss": 0.6636,
+ "step": 3349
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013047044635579747,
+ "loss": 0.4652,
+ "step": 3350
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013043448023808773,
+ "loss": 0.3912,
+ "step": 3351
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.000130398509781085,
+ "loss": 0.6064,
+ "step": 3352
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013036253498991787,
+ "loss": 0.5975,
+ "step": 3353
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013032655586971552,
+ "loss": 0.7249,
+ "step": 3354
+ },
+ {
+ "epoch": 3.98,
+ "learning_rate": 0.00013029057242560784,
+ "loss": 0.4604,
+ "step": 3355
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013025458466272525,
+ "loss": 0.4895,
+ "step": 3356
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.0001302185925861988,
+ "loss": 0.3628,
+ "step": 3357
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013018259620116025,
+ "loss": 0.4798,
+ "step": 3358
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013014659551274189,
+ "loss": 0.663,
+ "step": 3359
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013011059052607656,
+ "loss": 0.3923,
+ "step": 3360
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013007458124629785,
+ "loss": 0.5601,
+ "step": 3361
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013003856767853983,
+ "loss": 0.67,
+ "step": 3362
+ },
+ {
+ "epoch": 3.99,
+ "learning_rate": 0.00013000254982793735,
+ "loss": 0.5059,
+ "step": 3363
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012996652769962566,
+ "loss": 0.4992,
+ "step": 3364
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012993050129874082,
+ "loss": 0.6196,
+ "step": 3365
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012989447063041935,
+ "loss": 0.4157,
+ "step": 3366
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012985843569979848,
+ "loss": 0.5714,
+ "step": 3367
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.000129822396512016,
+ "loss": 0.7484,
+ "step": 3368
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012978635307221026,
+ "loss": 0.3928,
+ "step": 3369
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012975030538552032,
+ "loss": 0.4129,
+ "step": 3370
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.0001297142534570858,
+ "loss": 0.5407,
+ "step": 3371
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012967819729204684,
+ "loss": 0.479,
+ "step": 3372
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012964213689554437,
+ "loss": 0.4492,
+ "step": 3373
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012960607227271973,
+ "loss": 0.4574,
+ "step": 3374
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012957000342871502,
+ "loss": 0.7554,
+ "step": 3375
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012953393036867282,
+ "loss": 0.3038,
+ "step": 3376
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001294978530977364,
+ "loss": 0.5125,
+ "step": 3377
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001294617716210495,
+ "loss": 0.7192,
+ "step": 3378
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012942568594375667,
+ "loss": 0.4371,
+ "step": 3379
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012938959607100288,
+ "loss": 0.3672,
+ "step": 3380
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012935350200793378,
+ "loss": 0.4752,
+ "step": 3381
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.0001293174037596956,
+ "loss": 0.225,
+ "step": 3382
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012928130133143512,
+ "loss": 0.2106,
+ "step": 3383
+ },
+ {
+ "epoch": 4.0,
+ "learning_rate": 0.00012924519472829978,
+ "loss": 0.213,
+ "step": 3384
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001292090839554376,
+ "loss": 0.2775,
+ "step": 3385
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001291729690179972,
+ "loss": 0.2417,
+ "step": 3386
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.0001291368499211278,
+ "loss": 0.2212,
+ "step": 3387
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012910072666997912,
+ "loss": 0.2644,
+ "step": 3388
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012906459926970162,
+ "loss": 0.2206,
+ "step": 3389
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012902846772544624,
+ "loss": 0.2238,
+ "step": 3390
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012899233204236455,
+ "loss": 0.2212,
+ "step": 3391
+ },
+ {
+ "epoch": 4.01,
+ "learning_rate": 0.00012895619222560878,
+ "loss": 0.2082,
+ "step": 3392
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012892004828033156,
+ "loss": 0.2896,
+ "step": 3393
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012888390021168636,
+ "loss": 0.2351,
+ "step": 3394
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012884774802482697,
+ "loss": 0.2263,
+ "step": 3395
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.000128811591724908,
+ "loss": 0.2243,
+ "step": 3396
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.0001287754313170845,
+ "loss": 0.2433,
+ "step": 3397
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012873926680651222,
+ "loss": 0.2566,
+ "step": 3398
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012870309819834735,
+ "loss": 0.2537,
+ "step": 3399
+ },
+ {
+ "epoch": 4.02,
+ "learning_rate": 0.00012866692549774682,
+ "loss": 0.298,
+ "step": 3400
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.000128630748709868,
+ "loss": 0.2246,
+ "step": 3401
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012859456783986893,
+ "loss": 0.2179,
+ "step": 3402
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012855838289290821,
+ "loss": 0.2394,
+ "step": 3403
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.0001285221938741451,
+ "loss": 0.2068,
+ "step": 3404
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012848600078873925,
+ "loss": 0.1961,
+ "step": 3405
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012844980364185108,
+ "loss": 0.2719,
+ "step": 3406
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012841360243864147,
+ "loss": 0.2009,
+ "step": 3407
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012837739718427196,
+ "loss": 0.2343,
+ "step": 3408
+ },
+ {
+ "epoch": 4.03,
+ "learning_rate": 0.00012834118788390456,
+ "loss": 0.3161,
+ "step": 3409
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012830497454270205,
+ "loss": 0.1992,
+ "step": 3410
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012826875716582755,
+ "loss": 0.261,
+ "step": 3411
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012823253575844495,
+ "loss": 0.2403,
+ "step": 3412
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012819631032571854,
+ "loss": 0.2271,
+ "step": 3413
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012816008087281332,
+ "loss": 0.2062,
+ "step": 3414
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012812384740489485,
+ "loss": 0.2133,
+ "step": 3415
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012808760992712924,
+ "loss": 0.2372,
+ "step": 3416
+ },
+ {
+ "epoch": 4.04,
+ "learning_rate": 0.00012805136844468309,
+ "loss": 0.2466,
+ "step": 3417
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012801512296272368,
+ "loss": 0.2456,
+ "step": 3418
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012797887348641883,
+ "loss": 0.2171,
+ "step": 3419
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012794262002093697,
+ "loss": 0.3038,
+ "step": 3420
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.000127906362571447,
+ "loss": 0.1868,
+ "step": 3421
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012787010114311844,
+ "loss": 0.2611,
+ "step": 3422
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.00012783383574112138,
+ "loss": 0.2131,
+ "step": 3423
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.0001277975663706265,
+ "loss": 0.2005,
+ "step": 3424
+ },
+ {
+ "epoch": 4.05,
+ "learning_rate": 0.000127761293036805,
+ "loss": 0.2455,
+ "step": 3425
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.0001277250157448287,
+ "loss": 0.2837,
+ "step": 3426
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012768873449986988,
+ "loss": 0.2252,
+ "step": 3427
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012765244930710155,
+ "loss": 0.211,
+ "step": 3428
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012761616017169708,
+ "loss": 0.1831,
+ "step": 3429
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.0001275798670988306,
+ "loss": 0.1985,
+ "step": 3430
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012754357009367665,
+ "loss": 0.2341,
+ "step": 3431
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012750726916141046,
+ "loss": 0.2395,
+ "step": 3432
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.00012747096430720765,
+ "loss": 0.2183,
+ "step": 3433
+ },
+ {
+ "epoch": 4.06,
+ "learning_rate": 0.0001274346555362446,
+ "loss": 0.2698,
+ "step": 3434
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012739834285369807,
+ "loss": 0.2104,
+ "step": 3435
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.0001273620262647455,
+ "loss": 0.2395,
+ "step": 3436
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012732570577456484,
+ "loss": 0.2218,
+ "step": 3437
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012728938138833462,
+ "loss": 0.2337,
+ "step": 3438
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012725305311123386,
+ "loss": 0.1958,
+ "step": 3439
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.0001272167209484422,
+ "loss": 0.2767,
+ "step": 3440
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012718038490513984,
+ "loss": 0.2238,
+ "step": 3441
+ },
+ {
+ "epoch": 4.07,
+ "learning_rate": 0.00012714404498650743,
+ "loss": 0.2931,
+ "step": 3442
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012710770119772632,
+ "loss": 0.3166,
+ "step": 3443
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012707135354397836,
+ "loss": 0.1985,
+ "step": 3444
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012703500203044586,
+ "loss": 0.2208,
+ "step": 3445
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.0001269986466623118,
+ "loss": 0.2279,
+ "step": 3446
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012696228744475964,
+ "loss": 0.2656,
+ "step": 3447
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012692592438297341,
+ "loss": 0.2181,
+ "step": 3448
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.00012688955748213772,
+ "loss": 0.1994,
+ "step": 3449
+ },
+ {
+ "epoch": 4.08,
+ "learning_rate": 0.0001268531867474377,
+ "loss": 0.2818,
+ "step": 3450
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012681681218405897,
+ "loss": 0.2277,
+ "step": 3451
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012678043379718782,
+ "loss": 0.2692,
+ "step": 3452
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012674405159201091,
+ "loss": 0.2664,
+ "step": 3453
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012670766557371565,
+ "loss": 0.2008,
+ "step": 3454
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012667127574748986,
+ "loss": 0.2382,
+ "step": 3455
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.0001266348821185219,
+ "loss": 0.2454,
+ "step": 3456
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.0001265984846920008,
+ "loss": 0.3547,
+ "step": 3457
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012656208347311594,
+ "loss": 0.2115,
+ "step": 3458
+ },
+ {
+ "epoch": 4.09,
+ "learning_rate": 0.00012652567846705736,
+ "loss": 0.1929,
+ "step": 3459
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012648926967901567,
+ "loss": 0.2076,
+ "step": 3460
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012645285711418194,
+ "loss": 0.2045,
+ "step": 3461
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012641644077774776,
+ "loss": 0.2378,
+ "step": 3462
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.0001263800206749054,
+ "loss": 0.2674,
+ "step": 3463
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012634359681084752,
+ "loss": 0.2125,
+ "step": 3464
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.00012630716919076736,
+ "loss": 0.2097,
+ "step": 3465
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.0001262707378198587,
+ "loss": 0.2352,
+ "step": 3466
+ },
+ {
+ "epoch": 4.1,
+ "learning_rate": 0.0001262343027033159,
+ "loss": 0.2105,
+ "step": 3467
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012619786384633375,
+ "loss": 0.2207,
+ "step": 3468
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.0001261614212541077,
+ "loss": 0.304,
+ "step": 3469
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012612497493183364,
+ "loss": 0.2239,
+ "step": 3470
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012608852488470802,
+ "loss": 0.2875,
+ "step": 3471
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.0001260520711179278,
+ "loss": 0.3197,
+ "step": 3472
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012601561363669058,
+ "loss": 0.1942,
+ "step": 3473
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012597915244619428,
+ "loss": 0.2117,
+ "step": 3474
+ },
+ {
+ "epoch": 4.11,
+ "learning_rate": 0.00012594268755163754,
+ "loss": 0.2222,
+ "step": 3475
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012590621895821943,
+ "loss": 0.1871,
+ "step": 3476
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.0001258697466711396,
+ "loss": 0.2146,
+ "step": 3477
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.0001258332706955982,
+ "loss": 0.3307,
+ "step": 3478
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012579679103679592,
+ "loss": 0.2175,
+ "step": 3479
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012576030769993393,
+ "loss": 0.2976,
+ "step": 3480
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.000125723820690214,
+ "loss": 0.2031,
+ "step": 3481
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012568733001283827,
+ "loss": 0.2046,
+ "step": 3482
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.0001256508356730097,
+ "loss": 0.2642,
+ "step": 3483
+ },
+ {
+ "epoch": 4.12,
+ "learning_rate": 0.00012561433767593145,
+ "loss": 0.2088,
+ "step": 3484
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001255778360268074,
+ "loss": 0.2458,
+ "step": 3485
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001255413307308418,
+ "loss": 0.2237,
+ "step": 3486
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012550482179323963,
+ "loss": 0.2696,
+ "step": 3487
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012546830921920617,
+ "loss": 0.2078,
+ "step": 3488
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012543179301394744,
+ "loss": 0.2199,
+ "step": 3489
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001253952731826697,
+ "loss": 0.2258,
+ "step": 3490
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.00012535874973057997,
+ "loss": 0.1981,
+ "step": 3491
+ },
+ {
+ "epoch": 4.13,
+ "learning_rate": 0.0001253222226628857,
+ "loss": 0.3252,
+ "step": 3492
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012528569198479481,
+ "loss": 0.2717,
+ "step": 3493
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.0001252491577015158,
+ "loss": 0.248,
+ "step": 3494
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012521261981825768,
+ "loss": 0.2725,
+ "step": 3495
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012517607834022993,
+ "loss": 0.2203,
+ "step": 3496
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.0001251395332726426,
+ "loss": 0.2461,
+ "step": 3497
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012510298462070619,
+ "loss": 0.3018,
+ "step": 3498
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.0001250664323896317,
+ "loss": 0.2329,
+ "step": 3499
+ },
+ {
+ "epoch": 4.14,
+ "learning_rate": 0.00012502987658463075,
+ "loss": 0.221,
+ "step": 3500
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012499331721091544,
+ "loss": 0.2812,
+ "step": 3501
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012495675427369823,
+ "loss": 0.2846,
+ "step": 3502
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012492018777819226,
+ "loss": 0.2447,
+ "step": 3503
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.0001248836177296111,
+ "loss": 0.1969,
+ "step": 3504
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012484704413316878,
+ "loss": 0.2045,
+ "step": 3505
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012481046699408004,
+ "loss": 0.1862,
+ "step": 3506
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012477388631755985,
+ "loss": 0.23,
+ "step": 3507
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.0001247373021088239,
+ "loss": 0.2972,
+ "step": 3508
+ },
+ {
+ "epoch": 4.15,
+ "learning_rate": 0.00012470071437308827,
+ "loss": 0.2222,
+ "step": 3509
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012466412311556952,
+ "loss": 0.2262,
+ "step": 3510
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012462752834148486,
+ "loss": 0.3642,
+ "step": 3511
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.0001245909300560518,
+ "loss": 0.2221,
+ "step": 3512
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012455432826448862,
+ "loss": 0.2607,
+ "step": 3513
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012451772297201376,
+ "loss": 0.2396,
+ "step": 3514
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012448111418384645,
+ "loss": 0.2034,
+ "step": 3515
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012444450190520623,
+ "loss": 0.2404,
+ "step": 3516
+ },
+ {
+ "epoch": 4.16,
+ "learning_rate": 0.00012440788614131329,
+ "loss": 0.2029,
+ "step": 3517
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012437126689738816,
+ "loss": 0.2128,
+ "step": 3518
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012433464417865202,
+ "loss": 0.2857,
+ "step": 3519
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.0001242980179903264,
+ "loss": 0.2931,
+ "step": 3520
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012426138833763342,
+ "loss": 0.2319,
+ "step": 3521
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012422475522579573,
+ "loss": 0.2272,
+ "step": 3522
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012418811866003632,
+ "loss": 0.2498,
+ "step": 3523
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.00012415147864557884,
+ "loss": 0.1993,
+ "step": 3524
+ },
+ {
+ "epoch": 4.17,
+ "learning_rate": 0.0001241148351876473,
+ "loss": 0.2329,
+ "step": 3525
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001240781882914663,
+ "loss": 0.2228,
+ "step": 3526
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012404153796226087,
+ "loss": 0.2228,
+ "step": 3527
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012400488420525653,
+ "loss": 0.2277,
+ "step": 3528
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001239682270256793,
+ "loss": 0.2344,
+ "step": 3529
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001239315664287558,
+ "loss": 0.2043,
+ "step": 3530
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001238949024197129,
+ "loss": 0.2143,
+ "step": 3531
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012385823500377812,
+ "loss": 0.2054,
+ "step": 3532
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.00012382156418617947,
+ "loss": 0.2191,
+ "step": 3533
+ },
+ {
+ "epoch": 4.18,
+ "learning_rate": 0.0001237848899721454,
+ "loss": 0.2199,
+ "step": 3534
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012374821236690482,
+ "loss": 0.1899,
+ "step": 3535
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.0001237115313756872,
+ "loss": 0.2206,
+ "step": 3536
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012367484700372242,
+ "loss": 0.2107,
+ "step": 3537
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012363815925624087,
+ "loss": 0.1904,
+ "step": 3538
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012360146813847345,
+ "loss": 0.2259,
+ "step": 3539
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012356477365565148,
+ "loss": 0.2488,
+ "step": 3540
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012352807581300678,
+ "loss": 0.3026,
+ "step": 3541
+ },
+ {
+ "epoch": 4.19,
+ "learning_rate": 0.00012349137461577174,
+ "loss": 0.2141,
+ "step": 3542
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012345467006917907,
+ "loss": 0.2183,
+ "step": 3543
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012341796217846208,
+ "loss": 0.2978,
+ "step": 3544
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.0001233812509488545,
+ "loss": 0.2255,
+ "step": 3545
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012334453638559057,
+ "loss": 0.2209,
+ "step": 3546
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012330781849390494,
+ "loss": 0.2464,
+ "step": 3547
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012327109727903283,
+ "loss": 0.2259,
+ "step": 3548
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.00012323437274620983,
+ "loss": 0.209,
+ "step": 3549
+ },
+ {
+ "epoch": 4.2,
+ "learning_rate": 0.0001231976449006721,
+ "loss": 0.2424,
+ "step": 3550
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012316091374765624,
+ "loss": 0.2162,
+ "step": 3551
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.0001231241792923993,
+ "loss": 0.2442,
+ "step": 3552
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012308744154013878,
+ "loss": 0.2061,
+ "step": 3553
+ },
+ {
+ "epoch": 4.21,
+ "eval_loss": 3.390720844268799,
+ "eval_runtime": 283.8935,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 3553
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012305070049611273,
+ "loss": 0.1838,
+ "step": 3554
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012301395616555957,
+ "loss": 0.197,
+ "step": 3555
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.0001229772085537183,
+ "loss": 0.2479,
+ "step": 3556
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012294045766582823,
+ "loss": 0.3272,
+ "step": 3557
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012290370350712937,
+ "loss": 0.2301,
+ "step": 3558
+ },
+ {
+ "epoch": 4.21,
+ "learning_rate": 0.00012286694608286197,
+ "loss": 0.2367,
+ "step": 3559
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012283018539826685,
+ "loss": 0.2419,
+ "step": 3560
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.0001227934214585853,
+ "loss": 0.2605,
+ "step": 3561
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.000122756654269059,
+ "loss": 0.2084,
+ "step": 3562
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012271988383493024,
+ "loss": 0.2414,
+ "step": 3563
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012268311016144163,
+ "loss": 0.2206,
+ "step": 3564
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.0001226463332538363,
+ "loss": 0.2012,
+ "step": 3565
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012260955311735786,
+ "loss": 0.1884,
+ "step": 3566
+ },
+ {
+ "epoch": 4.22,
+ "learning_rate": 0.00012257276975725028,
+ "loss": 0.2155,
+ "step": 3567
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.0001225359831787581,
+ "loss": 0.2375,
+ "step": 3568
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012249919338712636,
+ "loss": 0.2713,
+ "step": 3569
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012246240038760043,
+ "loss": 0.2414,
+ "step": 3570
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012242560418542612,
+ "loss": 0.2209,
+ "step": 3571
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012238880478584985,
+ "loss": 0.2318,
+ "step": 3572
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012235200219411836,
+ "loss": 0.2858,
+ "step": 3573
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.000122315196415479,
+ "loss": 0.2468,
+ "step": 3574
+ },
+ {
+ "epoch": 4.23,
+ "learning_rate": 0.00012227838745517932,
+ "loss": 0.2166,
+ "step": 3575
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001222415753184676,
+ "loss": 0.2349,
+ "step": 3576
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012220476001059238,
+ "loss": 0.2486,
+ "step": 3577
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012216794153680274,
+ "loss": 0.234,
+ "step": 3578
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012213111990234815,
+ "loss": 0.2008,
+ "step": 3579
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012209429511247864,
+ "loss": 0.2548,
+ "step": 3580
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001220574671724446,
+ "loss": 0.2562,
+ "step": 3581
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001220206360874969,
+ "loss": 0.2586,
+ "step": 3582
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.0001219838018628868,
+ "loss": 0.2428,
+ "step": 3583
+ },
+ {
+ "epoch": 4.24,
+ "learning_rate": 0.00012194696450386608,
+ "loss": 0.2159,
+ "step": 3584
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012191012401568698,
+ "loss": 0.2544,
+ "step": 3585
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.0001218732804036021,
+ "loss": 0.2396,
+ "step": 3586
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012183643367286462,
+ "loss": 0.2335,
+ "step": 3587
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012179958382872796,
+ "loss": 0.2275,
+ "step": 3588
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012176273087644619,
+ "loss": 0.2291,
+ "step": 3589
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.0001217258748212737,
+ "loss": 0.2272,
+ "step": 3590
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012168901566846535,
+ "loss": 0.2135,
+ "step": 3591
+ },
+ {
+ "epoch": 4.25,
+ "learning_rate": 0.00012165215342327648,
+ "loss": 0.22,
+ "step": 3592
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012161528809096285,
+ "loss": 0.2577,
+ "step": 3593
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012157841967678063,
+ "loss": 0.2006,
+ "step": 3594
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012154154818598647,
+ "loss": 0.2322,
+ "step": 3595
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.0001215046736238374,
+ "loss": 0.196,
+ "step": 3596
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012146779599559095,
+ "loss": 0.2267,
+ "step": 3597
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012143091530650508,
+ "loss": 0.2416,
+ "step": 3598
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012139403156183817,
+ "loss": 0.2585,
+ "step": 3599
+ },
+ {
+ "epoch": 4.26,
+ "learning_rate": 0.00012135714476684903,
+ "loss": 0.2644,
+ "step": 3600
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012132025492679693,
+ "loss": 0.2355,
+ "step": 3601
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012128336204694148,
+ "loss": 0.2363,
+ "step": 3602
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012124646613254291,
+ "loss": 0.2476,
+ "step": 3603
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.0001212095671888617,
+ "loss": 0.2185,
+ "step": 3604
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012117266522115889,
+ "loss": 0.2233,
+ "step": 3605
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012113576023469582,
+ "loss": 0.2084,
+ "step": 3606
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012109885223473439,
+ "loss": 0.2439,
+ "step": 3607
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012106194122653684,
+ "loss": 0.2409,
+ "step": 3608
+ },
+ {
+ "epoch": 4.27,
+ "learning_rate": 0.00012102502721536595,
+ "loss": 0.2183,
+ "step": 3609
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012098811020648475,
+ "loss": 0.2595,
+ "step": 3610
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012095119020515691,
+ "loss": 0.2135,
+ "step": 3611
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.0001209142672166463,
+ "loss": 0.2125,
+ "step": 3612
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012087734124621742,
+ "loss": 0.2017,
+ "step": 3613
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012084041229913505,
+ "loss": 0.2163,
+ "step": 3614
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012080348038066452,
+ "loss": 0.2198,
+ "step": 3615
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012076654549607145,
+ "loss": 0.2234,
+ "step": 3616
+ },
+ {
+ "epoch": 4.28,
+ "learning_rate": 0.00012072960765062197,
+ "loss": 0.2201,
+ "step": 3617
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012069266684958265,
+ "loss": 0.245,
+ "step": 3618
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012065572309822037,
+ "loss": 0.2067,
+ "step": 3619
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012061877640180255,
+ "loss": 0.2284,
+ "step": 3620
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.000120581826765597,
+ "loss": 0.2323,
+ "step": 3621
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012054487419487188,
+ "loss": 0.2162,
+ "step": 3622
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012050791869489586,
+ "loss": 0.2131,
+ "step": 3623
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012047096027093798,
+ "loss": 0.2168,
+ "step": 3624
+ },
+ {
+ "epoch": 4.29,
+ "learning_rate": 0.00012043399892826768,
+ "loss": 0.2293,
+ "step": 3625
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012039703467215488,
+ "loss": 0.2202,
+ "step": 3626
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012036006750786985,
+ "loss": 0.2288,
+ "step": 3627
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012032309744068334,
+ "loss": 0.2606,
+ "step": 3628
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012028612447586643,
+ "loss": 0.2754,
+ "step": 3629
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012024914861869063,
+ "loss": 0.239,
+ "step": 3630
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012021216987442798,
+ "loss": 0.2312,
+ "step": 3631
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012017518824835077,
+ "loss": 0.2299,
+ "step": 3632
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012013820374573184,
+ "loss": 0.2214,
+ "step": 3633
+ },
+ {
+ "epoch": 4.3,
+ "learning_rate": 0.00012010121637184428,
+ "loss": 0.2492,
+ "step": 3634
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00012006422613196178,
+ "loss": 0.2659,
+ "step": 3635
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00012002723303135826,
+ "loss": 0.23,
+ "step": 3636
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011999023707530819,
+ "loss": 0.287,
+ "step": 3637
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011995323826908635,
+ "loss": 0.2204,
+ "step": 3638
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011991623661796798,
+ "loss": 0.2277,
+ "step": 3639
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011987923212722872,
+ "loss": 0.2436,
+ "step": 3640
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011984222480214456,
+ "loss": 0.2074,
+ "step": 3641
+ },
+ {
+ "epoch": 4.31,
+ "learning_rate": 0.00011980521464799198,
+ "loss": 0.2212,
+ "step": 3642
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011976820167004779,
+ "loss": 0.2147,
+ "step": 3643
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011973118587358928,
+ "loss": 0.2271,
+ "step": 3644
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011969416726389404,
+ "loss": 0.2498,
+ "step": 3645
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011965714584624012,
+ "loss": 0.2171,
+ "step": 3646
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011962012162590601,
+ "loss": 0.2276,
+ "step": 3647
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011958309460817052,
+ "loss": 0.2089,
+ "step": 3648
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011954606479831291,
+ "loss": 0.2691,
+ "step": 3649
+ },
+ {
+ "epoch": 4.32,
+ "learning_rate": 0.00011950903220161285,
+ "loss": 0.2229,
+ "step": 3650
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011947199682335031,
+ "loss": 0.2315,
+ "step": 3651
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.0001194349586688058,
+ "loss": 0.2208,
+ "step": 3652
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.0001193979177432601,
+ "loss": 0.2159,
+ "step": 3653
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011936087405199446,
+ "loss": 0.2781,
+ "step": 3654
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011932382760029049,
+ "loss": 0.2142,
+ "step": 3655
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011928677839343026,
+ "loss": 0.2275,
+ "step": 3656
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.0001192497264366961,
+ "loss": 0.2718,
+ "step": 3657
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011921267173537086,
+ "loss": 0.1947,
+ "step": 3658
+ },
+ {
+ "epoch": 4.33,
+ "learning_rate": 0.00011917561429473771,
+ "loss": 0.2361,
+ "step": 3659
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011913855412008023,
+ "loss": 0.1999,
+ "step": 3660
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011910149121668241,
+ "loss": 0.2199,
+ "step": 3661
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011906442558982865,
+ "loss": 0.2217,
+ "step": 3662
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.0001190273572448036,
+ "loss": 0.2263,
+ "step": 3663
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011899028618689247,
+ "loss": 0.2216,
+ "step": 3664
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011895321242138075,
+ "loss": 0.2298,
+ "step": 3665
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.0001189161359535544,
+ "loss": 0.2332,
+ "step": 3666
+ },
+ {
+ "epoch": 4.34,
+ "learning_rate": 0.00011887905678869966,
+ "loss": 0.2955,
+ "step": 3667
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011884197493210328,
+ "loss": 0.2352,
+ "step": 3668
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011880489038905223,
+ "loss": 0.2104,
+ "step": 3669
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011876780316483401,
+ "loss": 0.2897,
+ "step": 3670
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011873071326473644,
+ "loss": 0.2041,
+ "step": 3671
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011869362069404775,
+ "loss": 0.2242,
+ "step": 3672
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.0001186565254580565,
+ "loss": 0.2015,
+ "step": 3673
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011861942756205169,
+ "loss": 0.2716,
+ "step": 3674
+ },
+ {
+ "epoch": 4.35,
+ "learning_rate": 0.00011858232701132264,
+ "loss": 0.2504,
+ "step": 3675
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011854522381115908,
+ "loss": 0.1846,
+ "step": 3676
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011850811796685117,
+ "loss": 0.207,
+ "step": 3677
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011847100948368937,
+ "loss": 0.2228,
+ "step": 3678
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011843389836696447,
+ "loss": 0.2365,
+ "step": 3679
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011839678462196784,
+ "loss": 0.2159,
+ "step": 3680
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011835966825399096,
+ "loss": 0.2413,
+ "step": 3681
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011832254926832586,
+ "loss": 0.2596,
+ "step": 3682
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011828542767026493,
+ "loss": 0.2041,
+ "step": 3683
+ },
+ {
+ "epoch": 4.36,
+ "learning_rate": 0.00011824830346510089,
+ "loss": 0.2512,
+ "step": 3684
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011821117665812682,
+ "loss": 0.2165,
+ "step": 3685
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011817404725463618,
+ "loss": 0.2125,
+ "step": 3686
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011813691525992286,
+ "loss": 0.2557,
+ "step": 3687
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011809978067928102,
+ "loss": 0.2088,
+ "step": 3688
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011806264351800526,
+ "loss": 0.2093,
+ "step": 3689
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.0001180255037813906,
+ "loss": 0.2217,
+ "step": 3690
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011798836147473225,
+ "loss": 0.2681,
+ "step": 3691
+ },
+ {
+ "epoch": 4.37,
+ "learning_rate": 0.00011795121660332593,
+ "loss": 0.2257,
+ "step": 3692
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.0001179140691724677,
+ "loss": 0.2422,
+ "step": 3693
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011787691918745396,
+ "loss": 0.3328,
+ "step": 3694
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.0001178397666535815,
+ "loss": 0.233,
+ "step": 3695
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011780261157614747,
+ "loss": 0.243,
+ "step": 3696
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011776545396044936,
+ "loss": 0.2089,
+ "step": 3697
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011772829381178502,
+ "loss": 0.2143,
+ "step": 3698
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011769113113545267,
+ "loss": 0.2135,
+ "step": 3699
+ },
+ {
+ "epoch": 4.38,
+ "learning_rate": 0.00011765396593675097,
+ "loss": 0.2403,
+ "step": 3700
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011761679822097877,
+ "loss": 0.2182,
+ "step": 3701
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011757962799343547,
+ "loss": 0.2159,
+ "step": 3702
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011754245525942065,
+ "loss": 0.2098,
+ "step": 3703
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011750528002423437,
+ "loss": 0.2264,
+ "step": 3704
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.000117468102293177,
+ "loss": 0.2023,
+ "step": 3705
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011743092207154929,
+ "loss": 0.2978,
+ "step": 3706
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.0001173937393646523,
+ "loss": 0.2311,
+ "step": 3707
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.0001173565541777875,
+ "loss": 0.244,
+ "step": 3708
+ },
+ {
+ "epoch": 4.39,
+ "learning_rate": 0.00011731936651625668,
+ "loss": 0.2058,
+ "step": 3709
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011728217638536197,
+ "loss": 0.3039,
+ "step": 3710
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011724498379040587,
+ "loss": 0.2142,
+ "step": 3711
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.0001172077887366913,
+ "loss": 0.2262,
+ "step": 3712
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011717059122952136,
+ "loss": 0.2304,
+ "step": 3713
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011713339127419969,
+ "loss": 0.2093,
+ "step": 3714
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011709618887603014,
+ "loss": 0.2083,
+ "step": 3715
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011705898404031697,
+ "loss": 0.3559,
+ "step": 3716
+ },
+ {
+ "epoch": 4.4,
+ "learning_rate": 0.00011702177677236479,
+ "loss": 0.2728,
+ "step": 3717
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011698456707747854,
+ "loss": 0.246,
+ "step": 3718
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011694735496096354,
+ "loss": 0.2031,
+ "step": 3719
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011691014042812536,
+ "loss": 0.2049,
+ "step": 3720
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011687292348427004,
+ "loss": 0.248,
+ "step": 3721
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011683570413470383,
+ "loss": 0.2189,
+ "step": 3722
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011679848238473352,
+ "loss": 0.2302,
+ "step": 3723
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011676125823966602,
+ "loss": 0.2839,
+ "step": 3724
+ },
+ {
+ "epoch": 4.41,
+ "learning_rate": 0.00011672403170480872,
+ "loss": 0.2359,
+ "step": 3725
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011668680278546929,
+ "loss": 0.2288,
+ "step": 3726
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.0001166495714869558,
+ "loss": 0.2718,
+ "step": 3727
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011661233781457654,
+ "loss": 0.1967,
+ "step": 3728
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011657510177364032,
+ "loss": 0.2098,
+ "step": 3729
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011653786336945614,
+ "loss": 0.2466,
+ "step": 3730
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011650062260733339,
+ "loss": 0.2207,
+ "step": 3731
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011646337949258175,
+ "loss": 0.2124,
+ "step": 3732
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011642613403051133,
+ "loss": 0.213,
+ "step": 3733
+ },
+ {
+ "epoch": 4.42,
+ "learning_rate": 0.00011638888622643249,
+ "loss": 0.2276,
+ "step": 3734
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.000116351636085656,
+ "loss": 0.2206,
+ "step": 3735
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011631438361349287,
+ "loss": 0.2382,
+ "step": 3736
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011627712881525452,
+ "loss": 0.2264,
+ "step": 3737
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011623987169625261,
+ "loss": 0.2392,
+ "step": 3738
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011620261226179927,
+ "loss": 0.2139,
+ "step": 3739
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011616535051720685,
+ "loss": 0.2103,
+ "step": 3740
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011612808646778806,
+ "loss": 0.211,
+ "step": 3741
+ },
+ {
+ "epoch": 4.43,
+ "learning_rate": 0.00011609082011885592,
+ "loss": 0.2227,
+ "step": 3742
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011605355147572387,
+ "loss": 0.2459,
+ "step": 3743
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011601628054370553,
+ "loss": 0.2312,
+ "step": 3744
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011597900732811496,
+ "loss": 0.2244,
+ "step": 3745
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011594173183426647,
+ "loss": 0.2168,
+ "step": 3746
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011590445406747479,
+ "loss": 0.2711,
+ "step": 3747
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011586717403305487,
+ "loss": 0.1865,
+ "step": 3748
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011582989173632206,
+ "loss": 0.3104,
+ "step": 3749
+ },
+ {
+ "epoch": 4.44,
+ "learning_rate": 0.00011579260718259197,
+ "loss": 0.2245,
+ "step": 3750
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011575532037718057,
+ "loss": 0.2316,
+ "step": 3751
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011571803132540418,
+ "loss": 0.2328,
+ "step": 3752
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011568074003257938,
+ "loss": 0.267,
+ "step": 3753
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.0001156434465040231,
+ "loss": 0.2131,
+ "step": 3754
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.0001156061507450526,
+ "loss": 0.1945,
+ "step": 3755
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011556885276098536,
+ "loss": 0.2344,
+ "step": 3756
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011553155255713937,
+ "loss": 0.2221,
+ "step": 3757
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011549425013883275,
+ "loss": 0.2098,
+ "step": 3758
+ },
+ {
+ "epoch": 4.45,
+ "learning_rate": 0.00011545694551138409,
+ "loss": 0.2329,
+ "step": 3759
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011541963868011212,
+ "loss": 0.2187,
+ "step": 3760
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011538232965033601,
+ "loss": 0.1928,
+ "step": 3761
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011534501842737527,
+ "loss": 0.2103,
+ "step": 3762
+ },
+ {
+ "epoch": 4.46,
+ "eval_loss": 3.44382643699646,
+ "eval_runtime": 283.899,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 3762
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011530770501654959,
+ "loss": 0.2563,
+ "step": 3763
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011527038942317911,
+ "loss": 0.1922,
+ "step": 3764
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011523307165258419,
+ "loss": 0.2246,
+ "step": 3765
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011519575171008552,
+ "loss": 0.2243,
+ "step": 3766
+ },
+ {
+ "epoch": 4.46,
+ "learning_rate": 0.00011515842960100411,
+ "loss": 0.2481,
+ "step": 3767
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011512110533066132,
+ "loss": 0.2135,
+ "step": 3768
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011508377890437874,
+ "loss": 0.2019,
+ "step": 3769
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011504645032747832,
+ "loss": 0.2537,
+ "step": 3770
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011500911960528229,
+ "loss": 0.2131,
+ "step": 3771
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011497178674311317,
+ "loss": 0.2421,
+ "step": 3772
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011493445174629386,
+ "loss": 0.2012,
+ "step": 3773
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011489711462014751,
+ "loss": 0.2144,
+ "step": 3774
+ },
+ {
+ "epoch": 4.47,
+ "learning_rate": 0.00011485977536999757,
+ "loss": 0.2411,
+ "step": 3775
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011482243400116779,
+ "loss": 0.192,
+ "step": 3776
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011478509051898225,
+ "loss": 0.2245,
+ "step": 3777
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011474774492876532,
+ "loss": 0.241,
+ "step": 3778
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011471039723584162,
+ "loss": 0.2172,
+ "step": 3779
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011467304744553618,
+ "loss": 0.2308,
+ "step": 3780
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011463569556317424,
+ "loss": 0.2523,
+ "step": 3781
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011459834159408137,
+ "loss": 0.216,
+ "step": 3782
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011456098554358342,
+ "loss": 0.2098,
+ "step": 3783
+ },
+ {
+ "epoch": 4.48,
+ "learning_rate": 0.00011452362741700655,
+ "loss": 0.2101,
+ "step": 3784
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011448626721967717,
+ "loss": 0.3598,
+ "step": 3785
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011444890495692213,
+ "loss": 0.2131,
+ "step": 3786
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011441154063406841,
+ "loss": 0.3067,
+ "step": 3787
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011437417425644337,
+ "loss": 0.2866,
+ "step": 3788
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011433680582937461,
+ "loss": 0.2688,
+ "step": 3789
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011429943535819005,
+ "loss": 0.2286,
+ "step": 3790
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011426206284821792,
+ "loss": 0.215,
+ "step": 3791
+ },
+ {
+ "epoch": 4.49,
+ "learning_rate": 0.00011422468830478679,
+ "loss": 0.2293,
+ "step": 3792
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011418731173322532,
+ "loss": 0.2614,
+ "step": 3793
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011414993313886272,
+ "loss": 0.2223,
+ "step": 3794
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011411255252702829,
+ "loss": 0.2415,
+ "step": 3795
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011407516990305169,
+ "loss": 0.2429,
+ "step": 3796
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.0001140377852722629,
+ "loss": 0.2862,
+ "step": 3797
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011400039863999214,
+ "loss": 0.2399,
+ "step": 3798
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011396301001156992,
+ "loss": 0.915,
+ "step": 3799
+ },
+ {
+ "epoch": 4.5,
+ "learning_rate": 0.00011392561939232706,
+ "loss": 0.2398,
+ "step": 3800
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011388822678759464,
+ "loss": 0.2817,
+ "step": 3801
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011385083220270401,
+ "loss": 0.2224,
+ "step": 3802
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011381343564298683,
+ "loss": 0.2319,
+ "step": 3803
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011377603711377504,
+ "loss": 0.2269,
+ "step": 3804
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011373863662040087,
+ "loss": 0.2552,
+ "step": 3805
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011370123416819682,
+ "loss": 0.2335,
+ "step": 3806
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011366382976249564,
+ "loss": 0.2197,
+ "step": 3807
+ },
+ {
+ "epoch": 4.51,
+ "learning_rate": 0.00011362642340863034,
+ "loss": 0.2433,
+ "step": 3808
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011358901511193431,
+ "loss": 0.2135,
+ "step": 3809
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.0001135516048777412,
+ "loss": 0.2488,
+ "step": 3810
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.0001135141927113848,
+ "loss": 0.2426,
+ "step": 3811
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.0001134767786181993,
+ "loss": 0.247,
+ "step": 3812
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011343936260351913,
+ "loss": 0.2235,
+ "step": 3813
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011340194467267901,
+ "loss": 0.2109,
+ "step": 3814
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011336452483101394,
+ "loss": 0.2545,
+ "step": 3815
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011332710308385914,
+ "loss": 0.2104,
+ "step": 3816
+ },
+ {
+ "epoch": 4.52,
+ "learning_rate": 0.00011328967943655016,
+ "loss": 0.2089,
+ "step": 3817
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011325225389442277,
+ "loss": 0.2658,
+ "step": 3818
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011321482646281301,
+ "loss": 0.2736,
+ "step": 3819
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011317739714705731,
+ "loss": 0.2562,
+ "step": 3820
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011313996595249219,
+ "loss": 0.2223,
+ "step": 3821
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011310253288445456,
+ "loss": 0.2212,
+ "step": 3822
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011306509794828153,
+ "loss": 0.2217,
+ "step": 3823
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011302766114931054,
+ "loss": 0.2321,
+ "step": 3824
+ },
+ {
+ "epoch": 4.53,
+ "learning_rate": 0.00011299022249287922,
+ "loss": 0.2423,
+ "step": 3825
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011295278198432557,
+ "loss": 0.2651,
+ "step": 3826
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.0001129153396289877,
+ "loss": 0.2256,
+ "step": 3827
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011287789543220417,
+ "loss": 0.2656,
+ "step": 3828
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011284044939931364,
+ "loss": 0.2332,
+ "step": 3829
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.0001128030015356551,
+ "loss": 0.2121,
+ "step": 3830
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011276555184656783,
+ "loss": 0.2148,
+ "step": 3831
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011272810033739135,
+ "loss": 0.234,
+ "step": 3832
+ },
+ {
+ "epoch": 4.54,
+ "learning_rate": 0.00011269064701346534,
+ "loss": 0.2466,
+ "step": 3833
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011265319188012994,
+ "loss": 0.2008,
+ "step": 3834
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011261573494272538,
+ "loss": 0.1905,
+ "step": 3835
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011257827620659216,
+ "loss": 0.2515,
+ "step": 3836
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011254081567707115,
+ "loss": 0.2579,
+ "step": 3837
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011250335335950342,
+ "loss": 0.2598,
+ "step": 3838
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011246588925923018,
+ "loss": 0.2399,
+ "step": 3839
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011242842338159309,
+ "loss": 0.2181,
+ "step": 3840
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.0001123909557319339,
+ "loss": 0.2744,
+ "step": 3841
+ },
+ {
+ "epoch": 4.55,
+ "learning_rate": 0.00011235348631559473,
+ "loss": 0.2149,
+ "step": 3842
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011231601513791786,
+ "loss": 0.2184,
+ "step": 3843
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.0001122785422042459,
+ "loss": 0.2098,
+ "step": 3844
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011224106751992163,
+ "loss": 0.2277,
+ "step": 3845
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011220359109028815,
+ "loss": 0.2571,
+ "step": 3846
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011216611292068881,
+ "loss": 0.2087,
+ "step": 3847
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.0001121286330164671,
+ "loss": 0.2497,
+ "step": 3848
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.00011209115138296693,
+ "loss": 0.1869,
+ "step": 3849
+ },
+ {
+ "epoch": 4.56,
+ "learning_rate": 0.0001120536680255323,
+ "loss": 0.239,
+ "step": 3850
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011201618294950756,
+ "loss": 0.2018,
+ "step": 3851
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011197869616023722,
+ "loss": 0.2751,
+ "step": 3852
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011194120766306611,
+ "loss": 0.2526,
+ "step": 3853
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011190371746333923,
+ "loss": 0.2657,
+ "step": 3854
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011186622556640194,
+ "loss": 0.2659,
+ "step": 3855
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011182873197759971,
+ "loss": 0.2401,
+ "step": 3856
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011179123670227833,
+ "loss": 0.2299,
+ "step": 3857
+ },
+ {
+ "epoch": 4.57,
+ "learning_rate": 0.00011175373974578378,
+ "loss": 0.2249,
+ "step": 3858
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011171624111346232,
+ "loss": 0.2457,
+ "step": 3859
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011167874081066045,
+ "loss": 0.192,
+ "step": 3860
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011164123884272493,
+ "loss": 0.2591,
+ "step": 3861
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011160373521500264,
+ "loss": 0.2632,
+ "step": 3862
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011156622993284084,
+ "loss": 0.248,
+ "step": 3863
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011152872300158694,
+ "loss": 0.2071,
+ "step": 3864
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011149121442658861,
+ "loss": 0.2935,
+ "step": 3865
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011145370421319377,
+ "loss": 0.2191,
+ "step": 3866
+ },
+ {
+ "epoch": 4.58,
+ "learning_rate": 0.00011141619236675056,
+ "loss": 0.2737,
+ "step": 3867
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011137867889260734,
+ "loss": 0.2281,
+ "step": 3868
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011134116379611273,
+ "loss": 0.2083,
+ "step": 3869
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011130364708261552,
+ "loss": 0.2079,
+ "step": 3870
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011126612875746479,
+ "loss": 0.2423,
+ "step": 3871
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011122860882600986,
+ "loss": 0.1903,
+ "step": 3872
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011119108729360026,
+ "loss": 0.1995,
+ "step": 3873
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.0001111535641655857,
+ "loss": 0.2479,
+ "step": 3874
+ },
+ {
+ "epoch": 4.59,
+ "learning_rate": 0.00011111603944731623,
+ "loss": 0.198,
+ "step": 3875
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011107851314414197,
+ "loss": 0.2242,
+ "step": 3876
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.0001110409852614134,
+ "loss": 0.29,
+ "step": 3877
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011100345580448118,
+ "loss": 0.1931,
+ "step": 3878
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011096592477869616,
+ "loss": 0.2195,
+ "step": 3879
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.0001109283921894095,
+ "loss": 0.2383,
+ "step": 3880
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011089085804197248,
+ "loss": 0.2729,
+ "step": 3881
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011085332234173664,
+ "loss": 0.1836,
+ "step": 3882
+ },
+ {
+ "epoch": 4.6,
+ "learning_rate": 0.00011081578509405382,
+ "loss": 0.2724,
+ "step": 3883
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011077824630427594,
+ "loss": 0.2027,
+ "step": 3884
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011074070597775527,
+ "loss": 0.2681,
+ "step": 3885
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011070316411984421,
+ "loss": 0.205,
+ "step": 3886
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.0001106656207358954,
+ "loss": 0.3106,
+ "step": 3887
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011062807583126172,
+ "loss": 0.2126,
+ "step": 3888
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011059052941129628,
+ "loss": 0.4017,
+ "step": 3889
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011055298148135236,
+ "loss": 0.2406,
+ "step": 3890
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011051543204678348,
+ "loss": 0.2833,
+ "step": 3891
+ },
+ {
+ "epoch": 4.61,
+ "learning_rate": 0.00011047788111294333,
+ "loss": 0.2224,
+ "step": 3892
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.0001104403286851859,
+ "loss": 0.3536,
+ "step": 3893
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011040277476886533,
+ "loss": 0.2373,
+ "step": 3894
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011036521936933604,
+ "loss": 0.2297,
+ "step": 3895
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011032766249195252,
+ "loss": 0.1979,
+ "step": 3896
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011029010414206965,
+ "loss": 0.2434,
+ "step": 3897
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011025254432504233,
+ "loss": 0.2897,
+ "step": 3898
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011021498304622586,
+ "loss": 0.2121,
+ "step": 3899
+ },
+ {
+ "epoch": 4.62,
+ "learning_rate": 0.00011017742031097563,
+ "loss": 0.3021,
+ "step": 3900
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00011013985612464726,
+ "loss": 0.2463,
+ "step": 3901
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.0001101022904925966,
+ "loss": 0.3078,
+ "step": 3902
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00011006472342017966,
+ "loss": 0.3664,
+ "step": 3903
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.0001100271549127527,
+ "loss": 0.2176,
+ "step": 3904
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.0001099895849756722,
+ "loss": 0.2137,
+ "step": 3905
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00010995201361429474,
+ "loss": 0.2588,
+ "step": 3906
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00010991444083397728,
+ "loss": 0.2686,
+ "step": 3907
+ },
+ {
+ "epoch": 4.63,
+ "learning_rate": 0.00010987686664007679,
+ "loss": 0.2235,
+ "step": 3908
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010983929103795059,
+ "loss": 0.2602,
+ "step": 3909
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.0001098017140329561,
+ "loss": 0.1857,
+ "step": 3910
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010976413563045094,
+ "loss": 0.2307,
+ "step": 3911
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010972655583579308,
+ "loss": 0.2658,
+ "step": 3912
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010968897465434051,
+ "loss": 0.2106,
+ "step": 3913
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010965139209145152,
+ "loss": 0.2122,
+ "step": 3914
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010961380815248454,
+ "loss": 0.2433,
+ "step": 3915
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.0001095762228427982,
+ "loss": 0.2032,
+ "step": 3916
+ },
+ {
+ "epoch": 4.64,
+ "learning_rate": 0.00010953863616775138,
+ "loss": 0.3393,
+ "step": 3917
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010950104813270314,
+ "loss": 0.2476,
+ "step": 3918
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010946345874301264,
+ "loss": 0.1929,
+ "step": 3919
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.0001094258680040394,
+ "loss": 0.2509,
+ "step": 3920
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010938827592114294,
+ "loss": 0.2103,
+ "step": 3921
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010935068249968314,
+ "loss": 0.2297,
+ "step": 3922
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010931308774501998,
+ "loss": 0.2259,
+ "step": 3923
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010927549166251368,
+ "loss": 0.238,
+ "step": 3924
+ },
+ {
+ "epoch": 4.65,
+ "learning_rate": 0.00010923789425752456,
+ "loss": 0.3147,
+ "step": 3925
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010920029553541326,
+ "loss": 0.2753,
+ "step": 3926
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010916269550154048,
+ "loss": 0.2399,
+ "step": 3927
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.0001091250941612672,
+ "loss": 0.2196,
+ "step": 3928
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010908749151995452,
+ "loss": 0.2326,
+ "step": 3929
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.0001090498875829638,
+ "loss": 0.2217,
+ "step": 3930
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010901228235565651,
+ "loss": 0.2012,
+ "step": 3931
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010897467584339434,
+ "loss": 0.2018,
+ "step": 3932
+ },
+ {
+ "epoch": 4.66,
+ "learning_rate": 0.00010893706805153915,
+ "loss": 0.2382,
+ "step": 3933
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.000108899458985453,
+ "loss": 0.2202,
+ "step": 3934
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010886184865049813,
+ "loss": 0.2038,
+ "step": 3935
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010882423705203698,
+ "loss": 0.2406,
+ "step": 3936
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010878662419543206,
+ "loss": 0.2393,
+ "step": 3937
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010874901008604623,
+ "loss": 0.2626,
+ "step": 3938
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010871139472924237,
+ "loss": 0.246,
+ "step": 3939
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010867377813038366,
+ "loss": 0.2228,
+ "step": 3940
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010863616029483339,
+ "loss": 0.2091,
+ "step": 3941
+ },
+ {
+ "epoch": 4.67,
+ "learning_rate": 0.00010859854122795508,
+ "loss": 0.215,
+ "step": 3942
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.0001085609209351123,
+ "loss": 0.2071,
+ "step": 3943
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010852329942166894,
+ "loss": 0.2208,
+ "step": 3944
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010848567669298901,
+ "loss": 0.1988,
+ "step": 3945
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010844805275443673,
+ "loss": 0.2129,
+ "step": 3946
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010841042761137634,
+ "loss": 0.3038,
+ "step": 3947
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010837280126917248,
+ "loss": 0.206,
+ "step": 3948
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010833517373318975,
+ "loss": 0.2648,
+ "step": 3949
+ },
+ {
+ "epoch": 4.68,
+ "learning_rate": 0.00010829754500879308,
+ "loss": 0.2136,
+ "step": 3950
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.0001082599151013475,
+ "loss": 0.3746,
+ "step": 3951
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010822228401621819,
+ "loss": 0.2403,
+ "step": 3952
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010818465175877052,
+ "loss": 0.2288,
+ "step": 3953
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.0001081470183343701,
+ "loss": 0.2099,
+ "step": 3954
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010810938374838251,
+ "loss": 0.1992,
+ "step": 3955
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.0001080717480061737,
+ "loss": 0.2337,
+ "step": 3956
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010803411111310971,
+ "loss": 0.2127,
+ "step": 3957
+ },
+ {
+ "epoch": 4.69,
+ "learning_rate": 0.00010799647307455674,
+ "loss": 0.2936,
+ "step": 3958
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010795883389588111,
+ "loss": 0.3019,
+ "step": 3959
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010792119358244939,
+ "loss": 0.2262,
+ "step": 3960
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010788355213962825,
+ "loss": 0.2561,
+ "step": 3961
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.0001078459095727845,
+ "loss": 0.2992,
+ "step": 3962
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.0001078082658872852,
+ "loss": 0.2081,
+ "step": 3963
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010777062108849756,
+ "loss": 0.2089,
+ "step": 3964
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010773297518178881,
+ "loss": 0.2112,
+ "step": 3965
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010769532817252653,
+ "loss": 0.1898,
+ "step": 3966
+ },
+ {
+ "epoch": 4.7,
+ "learning_rate": 0.00010765768006607826,
+ "loss": 0.3229,
+ "step": 3967
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010762003086781185,
+ "loss": 0.2241,
+ "step": 3968
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010758238058309527,
+ "loss": 0.2814,
+ "step": 3969
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010754472921729661,
+ "loss": 0.2403,
+ "step": 3970
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010750707677578413,
+ "loss": 0.2715,
+ "step": 3971
+ },
+ {
+ "epoch": 4.71,
+ "eval_loss": 3.3954412937164307,
+ "eval_runtime": 283.9122,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 3971
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010746942326392628,
+ "loss": 0.2263,
+ "step": 3972
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010743176868709157,
+ "loss": 0.2433,
+ "step": 3973
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.0001073941130506488,
+ "loss": 0.2871,
+ "step": 3974
+ },
+ {
+ "epoch": 4.71,
+ "learning_rate": 0.00010735645635996676,
+ "loss": 0.2416,
+ "step": 3975
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.0001073187986204145,
+ "loss": 0.2563,
+ "step": 3976
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010728113983736126,
+ "loss": 0.2502,
+ "step": 3977
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010724348001617625,
+ "loss": 0.2145,
+ "step": 3978
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.000107205819162229,
+ "loss": 0.2639,
+ "step": 3979
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010716815728088912,
+ "loss": 0.2279,
+ "step": 3980
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.0001071304943775264,
+ "loss": 0.2086,
+ "step": 3981
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.00010709283045751069,
+ "loss": 0.2142,
+ "step": 3982
+ },
+ {
+ "epoch": 4.72,
+ "learning_rate": 0.0001070551655262121,
+ "loss": 0.2381,
+ "step": 3983
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010701749958900078,
+ "loss": 0.2313,
+ "step": 3984
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.0001069798326512471,
+ "loss": 0.1954,
+ "step": 3985
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010694216471832152,
+ "loss": 0.2253,
+ "step": 3986
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010690449579559469,
+ "loss": 0.2104,
+ "step": 3987
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010686682588843737,
+ "loss": 0.2172,
+ "step": 3988
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010682915500222051,
+ "loss": 0.2094,
+ "step": 3989
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010679148314231504,
+ "loss": 0.2885,
+ "step": 3990
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010675381031409225,
+ "loss": 0.3085,
+ "step": 3991
+ },
+ {
+ "epoch": 4.73,
+ "learning_rate": 0.00010671613652292343,
+ "loss": 0.2515,
+ "step": 3992
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010667846177418003,
+ "loss": 0.2314,
+ "step": 3993
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010664078607323367,
+ "loss": 0.2473,
+ "step": 3994
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010660310942545608,
+ "loss": 0.2283,
+ "step": 3995
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010656543183621912,
+ "loss": 0.226,
+ "step": 3996
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010652775331089477,
+ "loss": 0.2169,
+ "step": 3997
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010649007385485519,
+ "loss": 0.2079,
+ "step": 3998
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010645239347347269,
+ "loss": 0.2437,
+ "step": 3999
+ },
+ {
+ "epoch": 4.74,
+ "learning_rate": 0.00010641471217211958,
+ "loss": 0.2127,
+ "step": 4000
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010637702995616847,
+ "loss": 0.2527,
+ "step": 4001
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010633934683099196,
+ "loss": 0.2193,
+ "step": 4002
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.0001063016628019629,
+ "loss": 0.2744,
+ "step": 4003
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010626397787445416,
+ "loss": 0.2592,
+ "step": 4004
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010622629205383885,
+ "loss": 0.2107,
+ "step": 4005
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010618860534549006,
+ "loss": 0.1956,
+ "step": 4006
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010615091775478117,
+ "loss": 0.2546,
+ "step": 4007
+ },
+ {
+ "epoch": 4.75,
+ "learning_rate": 0.00010611322928708555,
+ "loss": 0.2376,
+ "step": 4008
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010607553994777684,
+ "loss": 0.2359,
+ "step": 4009
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010603784974222861,
+ "loss": 0.2631,
+ "step": 4010
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010600015867581474,
+ "loss": 0.2602,
+ "step": 4011
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010596246675390911,
+ "loss": 0.2043,
+ "step": 4012
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010592477398188575,
+ "loss": 0.2325,
+ "step": 4013
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.0001058870803651189,
+ "loss": 0.2395,
+ "step": 4014
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010584938590898281,
+ "loss": 0.2205,
+ "step": 4015
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010581169061885185,
+ "loss": 0.2169,
+ "step": 4016
+ },
+ {
+ "epoch": 4.76,
+ "learning_rate": 0.00010577399450010062,
+ "loss": 0.1986,
+ "step": 4017
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.0001057362975581037,
+ "loss": 0.2011,
+ "step": 4018
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010569859979823586,
+ "loss": 0.2208,
+ "step": 4019
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.000105660901225872,
+ "loss": 0.2478,
+ "step": 4020
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010562320184638714,
+ "loss": 0.1936,
+ "step": 4021
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010558550166515633,
+ "loss": 0.2719,
+ "step": 4022
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010554780068755483,
+ "loss": 0.2873,
+ "step": 4023
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010551009891895796,
+ "loss": 0.1993,
+ "step": 4024
+ },
+ {
+ "epoch": 4.77,
+ "learning_rate": 0.00010547239636474115,
+ "loss": 0.2174,
+ "step": 4025
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010543469303028002,
+ "loss": 0.2009,
+ "step": 4026
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010539698892095021,
+ "loss": 0.2038,
+ "step": 4027
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.0001053592840421275,
+ "loss": 0.2119,
+ "step": 4028
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010532157839918779,
+ "loss": 0.242,
+ "step": 4029
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010528387199750707,
+ "loss": 0.2026,
+ "step": 4030
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010524616484246146,
+ "loss": 0.2445,
+ "step": 4031
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010520845693942719,
+ "loss": 0.2793,
+ "step": 4032
+ },
+ {
+ "epoch": 4.78,
+ "learning_rate": 0.00010517074829378057,
+ "loss": 0.2658,
+ "step": 4033
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010513303891089803,
+ "loss": 0.2069,
+ "step": 4034
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010509532879615614,
+ "loss": 0.2211,
+ "step": 4035
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010505761795493145,
+ "loss": 0.2078,
+ "step": 4036
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010501990639260079,
+ "loss": 0.2796,
+ "step": 4037
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010498219411454098,
+ "loss": 0.2201,
+ "step": 4038
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.000104944481126129,
+ "loss": 0.198,
+ "step": 4039
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010490676743274181,
+ "loss": 0.2182,
+ "step": 4040
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010486905303975664,
+ "loss": 0.216,
+ "step": 4041
+ },
+ {
+ "epoch": 4.79,
+ "learning_rate": 0.00010483133795255071,
+ "loss": 0.2365,
+ "step": 4042
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010479362217650137,
+ "loss": 0.2472,
+ "step": 4043
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.0001047559057169861,
+ "loss": 0.2259,
+ "step": 4044
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010471818857938238,
+ "loss": 0.2306,
+ "step": 4045
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010468047076906793,
+ "loss": 0.2689,
+ "step": 4046
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.0001046427522914204,
+ "loss": 0.2361,
+ "step": 4047
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010460503315181768,
+ "loss": 0.2919,
+ "step": 4048
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010456731335563769,
+ "loss": 0.2397,
+ "step": 4049
+ },
+ {
+ "epoch": 4.8,
+ "learning_rate": 0.00010452959290825846,
+ "loss": 0.2144,
+ "step": 4050
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010449187181505804,
+ "loss": 0.258,
+ "step": 4051
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010445415008141473,
+ "loss": 0.2199,
+ "step": 4052
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010441642771270675,
+ "loss": 0.1817,
+ "step": 4053
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010437870471431251,
+ "loss": 0.2089,
+ "step": 4054
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010434098109161051,
+ "loss": 0.2047,
+ "step": 4055
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010430325684997928,
+ "loss": 0.2067,
+ "step": 4056
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.00010426553199479749,
+ "loss": 0.1996,
+ "step": 4057
+ },
+ {
+ "epoch": 4.81,
+ "learning_rate": 0.0001042278065314439,
+ "loss": 0.2205,
+ "step": 4058
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.0001041900804652973,
+ "loss": 0.2508,
+ "step": 4059
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010415235380173662,
+ "loss": 0.2562,
+ "step": 4060
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010411462654614088,
+ "loss": 0.2199,
+ "step": 4061
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010407689870388916,
+ "loss": 0.2718,
+ "step": 4062
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010403917028036058,
+ "loss": 0.2292,
+ "step": 4063
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010400144128093448,
+ "loss": 0.3123,
+ "step": 4064
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010396371171099006,
+ "loss": 0.2814,
+ "step": 4065
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010392598157590688,
+ "loss": 0.231,
+ "step": 4066
+ },
+ {
+ "epoch": 4.82,
+ "learning_rate": 0.00010388825088106433,
+ "loss": 0.2242,
+ "step": 4067
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010385051963184202,
+ "loss": 0.1998,
+ "step": 4068
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.0001038127878336196,
+ "loss": 0.1902,
+ "step": 4069
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010377505549177682,
+ "loss": 0.2198,
+ "step": 4070
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010373732261169346,
+ "loss": 0.2537,
+ "step": 4071
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010369958919874943,
+ "loss": 0.2267,
+ "step": 4072
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010366185525832467,
+ "loss": 0.2376,
+ "step": 4073
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010362412079579924,
+ "loss": 0.2076,
+ "step": 4074
+ },
+ {
+ "epoch": 4.83,
+ "learning_rate": 0.00010358638581655322,
+ "loss": 0.2507,
+ "step": 4075
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010354865032596682,
+ "loss": 0.2077,
+ "step": 4076
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010351091432942029,
+ "loss": 0.2762,
+ "step": 4077
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010347317783229398,
+ "loss": 0.2232,
+ "step": 4078
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010343544083996824,
+ "loss": 0.2475,
+ "step": 4079
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010339770335782359,
+ "loss": 0.2108,
+ "step": 4080
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010335996539124055,
+ "loss": 0.2544,
+ "step": 4081
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010332222694559975,
+ "loss": 0.2253,
+ "step": 4082
+ },
+ {
+ "epoch": 4.84,
+ "learning_rate": 0.00010328448802628183,
+ "loss": 0.2324,
+ "step": 4083
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010324674863866759,
+ "loss": 0.287,
+ "step": 4084
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.0001032090087881378,
+ "loss": 0.3515,
+ "step": 4085
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010317126848007337,
+ "loss": 0.2242,
+ "step": 4086
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.0001031335277198552,
+ "loss": 0.2242,
+ "step": 4087
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010309578651286436,
+ "loss": 0.1879,
+ "step": 4088
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010305804486448186,
+ "loss": 0.2261,
+ "step": 4089
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.0001030203027800889,
+ "loss": 0.2415,
+ "step": 4090
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010298256026506662,
+ "loss": 0.2141,
+ "step": 4091
+ },
+ {
+ "epoch": 4.85,
+ "learning_rate": 0.00010294481732479635,
+ "loss": 0.2015,
+ "step": 4092
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.0001029070739646593,
+ "loss": 0.206,
+ "step": 4093
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010286933019003697,
+ "loss": 0.2598,
+ "step": 4094
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010283158600631072,
+ "loss": 0.2561,
+ "step": 4095
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010279384141886208,
+ "loss": 0.1914,
+ "step": 4096
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010275609643307258,
+ "loss": 0.2416,
+ "step": 4097
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010271835105432388,
+ "loss": 0.2012,
+ "step": 4098
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010268060528799754,
+ "loss": 0.3043,
+ "step": 4099
+ },
+ {
+ "epoch": 4.86,
+ "learning_rate": 0.00010264285913947545,
+ "loss": 0.2331,
+ "step": 4100
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010260511261413923,
+ "loss": 0.237,
+ "step": 4101
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010256736571737083,
+ "loss": 0.2776,
+ "step": 4102
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010252961845455205,
+ "loss": 0.1938,
+ "step": 4103
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010249187083106486,
+ "loss": 0.2596,
+ "step": 4104
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010245412285229124,
+ "loss": 0.196,
+ "step": 4105
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010241637452361323,
+ "loss": 0.2369,
+ "step": 4106
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010237862585041293,
+ "loss": 0.2091,
+ "step": 4107
+ },
+ {
+ "epoch": 4.87,
+ "learning_rate": 0.00010234087683807247,
+ "loss": 0.2273,
+ "step": 4108
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010230312749197406,
+ "loss": 0.3996,
+ "step": 4109
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010226537781749987,
+ "loss": 0.2382,
+ "step": 4110
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010222762782003223,
+ "loss": 0.2174,
+ "step": 4111
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010218987750495343,
+ "loss": 0.2569,
+ "step": 4112
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010215212687764593,
+ "loss": 0.2239,
+ "step": 4113
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010211437594349203,
+ "loss": 0.3192,
+ "step": 4114
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010207662470787427,
+ "loss": 0.2347,
+ "step": 4115
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010203887317617511,
+ "loss": 0.2461,
+ "step": 4116
+ },
+ {
+ "epoch": 4.88,
+ "learning_rate": 0.00010200112135377709,
+ "loss": 0.2826,
+ "step": 4117
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010196336924606283,
+ "loss": 0.3531,
+ "step": 4118
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010192561685841496,
+ "loss": 0.2104,
+ "step": 4119
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010188786419621612,
+ "loss": 0.2257,
+ "step": 4120
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010185011126484903,
+ "loss": 0.2096,
+ "step": 4121
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.0001018123580696964,
+ "loss": 0.2009,
+ "step": 4122
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010177460461614108,
+ "loss": 0.3198,
+ "step": 4123
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010173685090956582,
+ "loss": 0.1979,
+ "step": 4124
+ },
+ {
+ "epoch": 4.89,
+ "learning_rate": 0.00010169909695535354,
+ "loss": 0.2507,
+ "step": 4125
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010166134275888708,
+ "loss": 0.2295,
+ "step": 4126
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010162358832554937,
+ "loss": 0.2355,
+ "step": 4127
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010158583366072338,
+ "loss": 0.2253,
+ "step": 4128
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010154807876979213,
+ "loss": 0.3306,
+ "step": 4129
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010151032365813859,
+ "loss": 0.2265,
+ "step": 4130
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010147256833114586,
+ "loss": 0.2176,
+ "step": 4131
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.000101434812794197,
+ "loss": 0.2631,
+ "step": 4132
+ },
+ {
+ "epoch": 4.9,
+ "learning_rate": 0.00010139705705267513,
+ "loss": 0.2241,
+ "step": 4133
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010135930111196338,
+ "loss": 0.2317,
+ "step": 4134
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.000101321544977445,
+ "loss": 0.2325,
+ "step": 4135
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010128378865450307,
+ "loss": 0.2011,
+ "step": 4136
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010124603214852093,
+ "loss": 0.232,
+ "step": 4137
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010120827546488174,
+ "loss": 0.2624,
+ "step": 4138
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010117051860896885,
+ "loss": 0.2452,
+ "step": 4139
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010113276158616553,
+ "loss": 0.2261,
+ "step": 4140
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.00010109500440185514,
+ "loss": 0.2378,
+ "step": 4141
+ },
+ {
+ "epoch": 4.91,
+ "learning_rate": 0.000101057247061421,
+ "loss": 0.2172,
+ "step": 4142
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010101948957024647,
+ "loss": 0.2539,
+ "step": 4143
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010098173193371499,
+ "loss": 0.2178,
+ "step": 4144
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010094397415720991,
+ "loss": 0.2545,
+ "step": 4145
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010090621624611474,
+ "loss": 0.2233,
+ "step": 4146
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.0001008684582058129,
+ "loss": 0.2547,
+ "step": 4147
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010083070004168786,
+ "loss": 0.232,
+ "step": 4148
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010079294175912313,
+ "loss": 0.313,
+ "step": 4149
+ },
+ {
+ "epoch": 4.92,
+ "learning_rate": 0.00010075518336350218,
+ "loss": 0.2234,
+ "step": 4150
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010071742486020854,
+ "loss": 0.2447,
+ "step": 4151
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010067966625462577,
+ "loss": 0.246,
+ "step": 4152
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010064190755213745,
+ "loss": 0.1836,
+ "step": 4153
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010060414875812709,
+ "loss": 0.2655,
+ "step": 4154
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010056638987797833,
+ "loss": 0.2338,
+ "step": 4155
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010052863091707467,
+ "loss": 0.2014,
+ "step": 4156
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.00010049087188079983,
+ "loss": 0.2492,
+ "step": 4157
+ },
+ {
+ "epoch": 4.93,
+ "learning_rate": 0.0001004531127745373,
+ "loss": 0.2547,
+ "step": 4158
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010041535360367085,
+ "loss": 0.2837,
+ "step": 4159
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010037759437358398,
+ "loss": 0.2598,
+ "step": 4160
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.0001003398350896604,
+ "loss": 0.2047,
+ "step": 4161
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010030207575728374,
+ "loss": 0.2006,
+ "step": 4162
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010026431638183771,
+ "loss": 0.2399,
+ "step": 4163
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010022655696870588,
+ "loss": 0.2508,
+ "step": 4164
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010018879752327202,
+ "loss": 0.2217,
+ "step": 4165
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010015103805091973,
+ "loss": 0.2649,
+ "step": 4166
+ },
+ {
+ "epoch": 4.94,
+ "learning_rate": 0.00010011327855703267,
+ "loss": 0.2819,
+ "step": 4167
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 0.00010007551904699459,
+ "loss": 0.2435,
+ "step": 4168
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 0.00010003775952618914,
+ "loss": 0.2641,
+ "step": 4169
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 0.0001,
+ "loss": 0.2157,
+ "step": 4170
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 9.996224047381087e-05,
+ "loss": 0.2414,
+ "step": 4171
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 9.992448095300542e-05,
+ "loss": 0.1886,
+ "step": 4172
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 9.988672144296735e-05,
+ "loss": 0.2392,
+ "step": 4173
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 9.984896194908031e-05,
+ "loss": 0.2231,
+ "step": 4174
+ },
+ {
+ "epoch": 4.95,
+ "learning_rate": 9.981120247672801e-05,
+ "loss": 0.2024,
+ "step": 4175
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.97734430312941e-05,
+ "loss": 0.2104,
+ "step": 4176
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.973568361816233e-05,
+ "loss": 0.2793,
+ "step": 4177
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.969792424271627e-05,
+ "loss": 0.2292,
+ "step": 4178
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.966016491033962e-05,
+ "loss": 0.2046,
+ "step": 4179
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.962240562641602e-05,
+ "loss": 0.2099,
+ "step": 4180
+ },
+ {
+ "epoch": 4.96,
+ "eval_loss": 3.49141788482666,
+ "eval_runtime": 284.0307,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 4180
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.95846463963292e-05,
+ "loss": 0.2376,
+ "step": 4181
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.95468872254627e-05,
+ "loss": 0.21,
+ "step": 4182
+ },
+ {
+ "epoch": 4.96,
+ "learning_rate": 9.950912811920021e-05,
+ "loss": 0.2079,
+ "step": 4183
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.947136908292534e-05,
+ "loss": 0.2151,
+ "step": 4184
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.943361012202172e-05,
+ "loss": 0.2247,
+ "step": 4185
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.939585124187292e-05,
+ "loss": 0.2318,
+ "step": 4186
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.935809244786256e-05,
+ "loss": 0.2414,
+ "step": 4187
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.932033374537422e-05,
+ "loss": 0.1861,
+ "step": 4188
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.92825751397915e-05,
+ "loss": 0.2475,
+ "step": 4189
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.924481663649785e-05,
+ "loss": 0.2163,
+ "step": 4190
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.92070582408769e-05,
+ "loss": 0.2044,
+ "step": 4191
+ },
+ {
+ "epoch": 4.97,
+ "learning_rate": 9.916929995831215e-05,
+ "loss": 0.2369,
+ "step": 4192
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.913154179418713e-05,
+ "loss": 0.2553,
+ "step": 4193
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.909378375388529e-05,
+ "loss": 0.3004,
+ "step": 4194
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.90560258427901e-05,
+ "loss": 0.2313,
+ "step": 4195
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.901826806628505e-05,
+ "loss": 0.2134,
+ "step": 4196
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.898051042975358e-05,
+ "loss": 0.2175,
+ "step": 4197
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.894275293857904e-05,
+ "loss": 0.1946,
+ "step": 4198
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.890499559814487e-05,
+ "loss": 0.2395,
+ "step": 4199
+ },
+ {
+ "epoch": 4.98,
+ "learning_rate": 9.886723841383448e-05,
+ "loss": 0.215,
+ "step": 4200
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.882948139103118e-05,
+ "loss": 0.2206,
+ "step": 4201
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.879172453511827e-05,
+ "loss": 0.3013,
+ "step": 4202
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.875396785147909e-05,
+ "loss": 0.2874,
+ "step": 4203
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.871621134549692e-05,
+ "loss": 0.2445,
+ "step": 4204
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.867845502255506e-05,
+ "loss": 0.2294,
+ "step": 4205
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.864069888803663e-05,
+ "loss": 0.2251,
+ "step": 4206
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.860294294732489e-05,
+ "loss": 0.2164,
+ "step": 4207
+ },
+ {
+ "epoch": 4.99,
+ "learning_rate": 9.856518720580303e-05,
+ "loss": 0.2441,
+ "step": 4208
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.852743166885417e-05,
+ "loss": 0.1956,
+ "step": 4209
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.848967634186142e-05,
+ "loss": 0.2312,
+ "step": 4210
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.845192123020789e-05,
+ "loss": 0.2617,
+ "step": 4211
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.841416633927662e-05,
+ "loss": 0.2207,
+ "step": 4212
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.837641167445065e-05,
+ "loss": 0.1902,
+ "step": 4213
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.833865724111295e-05,
+ "loss": 0.2028,
+ "step": 4214
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.830090304464647e-05,
+ "loss": 0.2526,
+ "step": 4215
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.826314909043418e-05,
+ "loss": 0.2181,
+ "step": 4216
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.822539538385897e-05,
+ "loss": 0.2086,
+ "step": 4217
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.818764193030363e-05,
+ "loss": 0.252,
+ "step": 4218
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.8149888735151e-05,
+ "loss": 0.248,
+ "step": 4219
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.81121358037839e-05,
+ "loss": 0.1881,
+ "step": 4220
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.807438314158508e-05,
+ "loss": 0.244,
+ "step": 4221
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.803663075393718e-05,
+ "loss": 0.2777,
+ "step": 4222
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.799887864622292e-05,
+ "loss": 0.2263,
+ "step": 4223
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.796112682382493e-05,
+ "loss": 0.2016,
+ "step": 4224
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.792337529212578e-05,
+ "loss": 0.212,
+ "step": 4225
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.7885624056508e-05,
+ "loss": 0.2941,
+ "step": 4226
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.784787312235411e-05,
+ "loss": 0.2384,
+ "step": 4227
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.781012249504655e-05,
+ "loss": 0.1963,
+ "step": 4228
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.777237217996779e-05,
+ "loss": 0.1817,
+ "step": 4229
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.773462218250015e-05,
+ "loss": 0.1794,
+ "step": 4230
+ },
+ {
+ "epoch": 5.0,
+ "learning_rate": 9.769687250802597e-05,
+ "loss": 0.1783,
+ "step": 4231
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.765912316192752e-05,
+ "loss": 0.2024,
+ "step": 4232
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.76213741495871e-05,
+ "loss": 0.1771,
+ "step": 4233
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.75836254763868e-05,
+ "loss": 0.1834,
+ "step": 4234
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.754587714770878e-05,
+ "loss": 0.1689,
+ "step": 4235
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.750812916893517e-05,
+ "loss": 0.1893,
+ "step": 4236
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.747038154544795e-05,
+ "loss": 0.1801,
+ "step": 4237
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.743263428262921e-05,
+ "loss": 0.1973,
+ "step": 4238
+ },
+ {
+ "epoch": 5.01,
+ "learning_rate": 9.739488738586078e-05,
+ "loss": 0.1716,
+ "step": 4239
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.735714086052458e-05,
+ "loss": 0.1803,
+ "step": 4240
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.731939471200244e-05,
+ "loss": 0.199,
+ "step": 4241
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.728164894567617e-05,
+ "loss": 0.1682,
+ "step": 4242
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.724390356692745e-05,
+ "loss": 0.2256,
+ "step": 4243
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.720615858113794e-05,
+ "loss": 0.1715,
+ "step": 4244
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.71684139936893e-05,
+ "loss": 0.1864,
+ "step": 4245
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.713066980996308e-05,
+ "loss": 0.1682,
+ "step": 4246
+ },
+ {
+ "epoch": 5.02,
+ "learning_rate": 9.709292603534072e-05,
+ "loss": 0.1824,
+ "step": 4247
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.705518267520368e-05,
+ "loss": 0.1741,
+ "step": 4248
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.701743973493337e-05,
+ "loss": 0.1716,
+ "step": 4249
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.697969721991114e-05,
+ "loss": 0.1889,
+ "step": 4250
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.694195513551815e-05,
+ "loss": 0.1823,
+ "step": 4251
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.690421348713568e-05,
+ "loss": 0.186,
+ "step": 4252
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.686647228014482e-05,
+ "loss": 0.1867,
+ "step": 4253
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.682873151992668e-05,
+ "loss": 0.173,
+ "step": 4254
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.679099121186222e-05,
+ "loss": 0.1878,
+ "step": 4255
+ },
+ {
+ "epoch": 5.03,
+ "learning_rate": 9.675325136133244e-05,
+ "loss": 0.1812,
+ "step": 4256
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.671551197371818e-05,
+ "loss": 0.1883,
+ "step": 4257
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.667777305440029e-05,
+ "loss": 0.1687,
+ "step": 4258
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.664003460875947e-05,
+ "loss": 0.1788,
+ "step": 4259
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.660229664217642e-05,
+ "loss": 0.1695,
+ "step": 4260
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.656455916003176e-05,
+ "loss": 0.1779,
+ "step": 4261
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.652682216770607e-05,
+ "loss": 0.1718,
+ "step": 4262
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.648908567057974e-05,
+ "loss": 0.1728,
+ "step": 4263
+ },
+ {
+ "epoch": 5.04,
+ "learning_rate": 9.645134967403319e-05,
+ "loss": 0.2087,
+ "step": 4264
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.64136141834468e-05,
+ "loss": 0.1742,
+ "step": 4265
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.63758792042008e-05,
+ "loss": 0.1867,
+ "step": 4266
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.633814474167535e-05,
+ "loss": 0.1854,
+ "step": 4267
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.630041080125058e-05,
+ "loss": 0.184,
+ "step": 4268
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.626267738830655e-05,
+ "loss": 0.1796,
+ "step": 4269
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.62249445082232e-05,
+ "loss": 0.1899,
+ "step": 4270
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.61872121663804e-05,
+ "loss": 0.1717,
+ "step": 4271
+ },
+ {
+ "epoch": 5.05,
+ "learning_rate": 9.614948036815799e-05,
+ "loss": 0.1754,
+ "step": 4272
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.611174911893568e-05,
+ "loss": 0.1796,
+ "step": 4273
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.607401842409317e-05,
+ "loss": 0.1737,
+ "step": 4274
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.603628828900996e-05,
+ "loss": 0.1813,
+ "step": 4275
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.599855871906555e-05,
+ "loss": 0.1819,
+ "step": 4276
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.59608297196394e-05,
+ "loss": 0.1775,
+ "step": 4277
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.592310129611089e-05,
+ "loss": 0.1786,
+ "step": 4278
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.588537345385914e-05,
+ "loss": 0.1757,
+ "step": 4279
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.584764619826339e-05,
+ "loss": 0.1768,
+ "step": 4280
+ },
+ {
+ "epoch": 5.06,
+ "learning_rate": 9.580991953470271e-05,
+ "loss": 0.1705,
+ "step": 4281
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.577219346855613e-05,
+ "loss": 0.1822,
+ "step": 4282
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.573446800520253e-05,
+ "loss": 0.1817,
+ "step": 4283
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.569674315002074e-05,
+ "loss": 0.1814,
+ "step": 4284
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.56590189083895e-05,
+ "loss": 0.1789,
+ "step": 4285
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.562129528568753e-05,
+ "loss": 0.174,
+ "step": 4286
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.558357228729327e-05,
+ "loss": 0.1769,
+ "step": 4287
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.554584991858528e-05,
+ "loss": 0.1748,
+ "step": 4288
+ },
+ {
+ "epoch": 5.07,
+ "learning_rate": 9.550812818494194e-05,
+ "loss": 0.1752,
+ "step": 4289
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.547040709174159e-05,
+ "loss": 0.171,
+ "step": 4290
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.543268664436233e-05,
+ "loss": 0.1822,
+ "step": 4291
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.539496684818233e-05,
+ "loss": 0.1746,
+ "step": 4292
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.535724770857962e-05,
+ "loss": 0.1729,
+ "step": 4293
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.531952923093211e-05,
+ "loss": 0.1888,
+ "step": 4294
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.528181142061763e-05,
+ "loss": 0.1786,
+ "step": 4295
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.524409428301392e-05,
+ "loss": 0.202,
+ "step": 4296
+ },
+ {
+ "epoch": 5.08,
+ "learning_rate": 9.520637782349863e-05,
+ "loss": 0.1846,
+ "step": 4297
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.516866204744931e-05,
+ "loss": 0.1754,
+ "step": 4298
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.513094696024338e-05,
+ "loss": 0.178,
+ "step": 4299
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.509323256725821e-05,
+ "loss": 0.1744,
+ "step": 4300
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.505551887387102e-05,
+ "loss": 0.1791,
+ "step": 4301
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.501780588545901e-05,
+ "loss": 0.1831,
+ "step": 4302
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.498009360739925e-05,
+ "loss": 0.1786,
+ "step": 4303
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.494238204506858e-05,
+ "loss": 0.1861,
+ "step": 4304
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.490467120384389e-05,
+ "loss": 0.1823,
+ "step": 4305
+ },
+ {
+ "epoch": 5.09,
+ "learning_rate": 9.486696108910198e-05,
+ "loss": 0.1811,
+ "step": 4306
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.482925170621946e-05,
+ "loss": 0.183,
+ "step": 4307
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.479154306057284e-05,
+ "loss": 0.1857,
+ "step": 4308
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.475383515753856e-05,
+ "loss": 0.1701,
+ "step": 4309
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.471612800249296e-05,
+ "loss": 0.181,
+ "step": 4310
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.467842160081225e-05,
+ "loss": 0.1735,
+ "step": 4311
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.464071595787253e-05,
+ "loss": 0.1959,
+ "step": 4312
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.46030110790498e-05,
+ "loss": 0.1854,
+ "step": 4313
+ },
+ {
+ "epoch": 5.1,
+ "learning_rate": 9.456530696971999e-05,
+ "loss": 0.178,
+ "step": 4314
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.452760363525887e-05,
+ "loss": 0.1821,
+ "step": 4315
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.448990108104208e-05,
+ "loss": 0.1774,
+ "step": 4316
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.44521993124452e-05,
+ "loss": 0.1773,
+ "step": 4317
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.441449833484368e-05,
+ "loss": 0.1697,
+ "step": 4318
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.437679815361291e-05,
+ "loss": 0.1658,
+ "step": 4319
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.433909877412802e-05,
+ "loss": 0.1817,
+ "step": 4320
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.430140020176416e-05,
+ "loss": 0.1695,
+ "step": 4321
+ },
+ {
+ "epoch": 5.11,
+ "learning_rate": 9.426370244189632e-05,
+ "loss": 0.1691,
+ "step": 4322
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.422600549989942e-05,
+ "loss": 0.1717,
+ "step": 4323
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.418830938114816e-05,
+ "loss": 0.1788,
+ "step": 4324
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.415061409101721e-05,
+ "loss": 0.1773,
+ "step": 4325
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.411291963488109e-05,
+ "loss": 0.1791,
+ "step": 4326
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.407522601811425e-05,
+ "loss": 0.172,
+ "step": 4327
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.403753324609091e-05,
+ "loss": 0.1713,
+ "step": 4328
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.399984132418528e-05,
+ "loss": 0.1958,
+ "step": 4329
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.396215025777139e-05,
+ "loss": 0.1701,
+ "step": 4330
+ },
+ {
+ "epoch": 5.12,
+ "learning_rate": 9.392446005222321e-05,
+ "loss": 0.1865,
+ "step": 4331
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.388677071291446e-05,
+ "loss": 0.1773,
+ "step": 4332
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.384908224521886e-05,
+ "loss": 0.168,
+ "step": 4333
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.381139465450993e-05,
+ "loss": 0.1963,
+ "step": 4334
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.37737079461612e-05,
+ "loss": 0.1697,
+ "step": 4335
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.373602212554586e-05,
+ "loss": 0.1748,
+ "step": 4336
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.369833719803712e-05,
+ "loss": 0.1669,
+ "step": 4337
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.366065316900805e-05,
+ "loss": 0.1726,
+ "step": 4338
+ },
+ {
+ "epoch": 5.13,
+ "learning_rate": 9.362297004383158e-05,
+ "loss": 0.1778,
+ "step": 4339
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.358528782788045e-05,
+ "loss": 0.1782,
+ "step": 4340
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.354760652652734e-05,
+ "loss": 0.1864,
+ "step": 4341
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.35099261451448e-05,
+ "loss": 0.1761,
+ "step": 4342
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.347224668910528e-05,
+ "loss": 0.1882,
+ "step": 4343
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.343456816378092e-05,
+ "loss": 0.1746,
+ "step": 4344
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.339689057454393e-05,
+ "loss": 0.2131,
+ "step": 4345
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.335921392676631e-05,
+ "loss": 0.1772,
+ "step": 4346
+ },
+ {
+ "epoch": 5.14,
+ "learning_rate": 9.332153822582e-05,
+ "loss": 0.1787,
+ "step": 4347
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.32838634770766e-05,
+ "loss": 0.1713,
+ "step": 4348
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.324618968590776e-05,
+ "loss": 0.1885,
+ "step": 4349
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.320851685768497e-05,
+ "loss": 0.1783,
+ "step": 4350
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.317084499777956e-05,
+ "loss": 0.1833,
+ "step": 4351
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.313317411156264e-05,
+ "loss": 0.1849,
+ "step": 4352
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.309550420440532e-05,
+ "loss": 0.1753,
+ "step": 4353
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.305783528167849e-05,
+ "loss": 0.1873,
+ "step": 4354
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.302016734875292e-05,
+ "loss": 0.1798,
+ "step": 4355
+ },
+ {
+ "epoch": 5.15,
+ "learning_rate": 9.298250041099924e-05,
+ "loss": 0.1735,
+ "step": 4356
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.294483447378792e-05,
+ "loss": 0.1766,
+ "step": 4357
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.29071695424893e-05,
+ "loss": 0.1683,
+ "step": 4358
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.286950562247365e-05,
+ "loss": 0.1815,
+ "step": 4359
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.283184271911089e-05,
+ "loss": 0.1736,
+ "step": 4360
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.279418083777103e-05,
+ "loss": 0.198,
+ "step": 4361
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.275651998382377e-05,
+ "loss": 0.1734,
+ "step": 4362
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.27188601626388e-05,
+ "loss": 0.1974,
+ "step": 4363
+ },
+ {
+ "epoch": 5.16,
+ "learning_rate": 9.268120137958551e-05,
+ "loss": 0.1813,
+ "step": 4364
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.264354364003327e-05,
+ "loss": 0.1823,
+ "step": 4365
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.260588694935124e-05,
+ "loss": 0.1723,
+ "step": 4366
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.256823131290844e-05,
+ "loss": 0.2007,
+ "step": 4367
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.253057673607376e-05,
+ "loss": 0.1844,
+ "step": 4368
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.249292322421589e-05,
+ "loss": 0.1773,
+ "step": 4369
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.245527078270341e-05,
+ "loss": 0.1707,
+ "step": 4370
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.241761941690474e-05,
+ "loss": 0.1769,
+ "step": 4371
+ },
+ {
+ "epoch": 5.17,
+ "learning_rate": 9.237996913218819e-05,
+ "loss": 0.1741,
+ "step": 4372
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.234231993392177e-05,
+ "loss": 0.1807,
+ "step": 4373
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.230467182747351e-05,
+ "loss": 0.1746,
+ "step": 4374
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.226702481821118e-05,
+ "loss": 0.1794,
+ "step": 4375
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.222937891150249e-05,
+ "loss": 0.1798,
+ "step": 4376
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.21917341127148e-05,
+ "loss": 0.1682,
+ "step": 4377
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.215409042721552e-05,
+ "loss": 0.196,
+ "step": 4378
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.211644786037179e-05,
+ "loss": 0.1723,
+ "step": 4379
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.207880641755065e-05,
+ "loss": 0.1698,
+ "step": 4380
+ },
+ {
+ "epoch": 5.18,
+ "learning_rate": 9.204116610411893e-05,
+ "loss": 0.1717,
+ "step": 4381
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.20035269254433e-05,
+ "loss": 0.1823,
+ "step": 4382
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.19658888868903e-05,
+ "loss": 0.1758,
+ "step": 4383
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.192825199382632e-05,
+ "loss": 0.1773,
+ "step": 4384
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.189061625161751e-05,
+ "loss": 0.1777,
+ "step": 4385
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.185298166562994e-05,
+ "loss": 0.1751,
+ "step": 4386
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.181534824122947e-05,
+ "loss": 0.1813,
+ "step": 4387
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.177771598378185e-05,
+ "loss": 0.1863,
+ "step": 4388
+ },
+ {
+ "epoch": 5.19,
+ "learning_rate": 9.174008489865253e-05,
+ "loss": 0.1933,
+ "step": 4389
+ },
+ {
+ "epoch": 5.19,
+ "eval_loss": 3.8528122901916504,
+ "eval_runtime": 284.0506,
+ "eval_samples_per_second": 0.725,
+ "eval_steps_per_second": 0.725,
+ "step": 4389
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.170245499120693e-05,
+ "loss": 0.1716,
+ "step": 4390
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.166482626681024e-05,
+ "loss": 0.1875,
+ "step": 4391
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.162719873082757e-05,
+ "loss": 0.1881,
+ "step": 4392
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.158957238862367e-05,
+ "loss": 0.1838,
+ "step": 4393
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.155194724556331e-05,
+ "loss": 0.1724,
+ "step": 4394
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.151432330701097e-05,
+ "loss": 0.1859,
+ "step": 4395
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.147670057833107e-05,
+ "loss": 0.1739,
+ "step": 4396
+ },
+ {
+ "epoch": 5.2,
+ "learning_rate": 9.143907906488772e-05,
+ "loss": 0.1885,
+ "step": 4397
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.140145877204496e-05,
+ "loss": 0.1828,
+ "step": 4398
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.13638397051666e-05,
+ "loss": 0.1937,
+ "step": 4399
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.132622186961637e-05,
+ "loss": 0.1731,
+ "step": 4400
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.128860527075767e-05,
+ "loss": 0.1721,
+ "step": 4401
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.125098991395378e-05,
+ "loss": 0.1724,
+ "step": 4402
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.121337580456793e-05,
+ "loss": 0.18,
+ "step": 4403
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.117576294796307e-05,
+ "loss": 0.1731,
+ "step": 4404
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.11381513495019e-05,
+ "loss": 0.1728,
+ "step": 4405
+ },
+ {
+ "epoch": 5.21,
+ "learning_rate": 9.110054101454701e-05,
+ "loss": 0.1787,
+ "step": 4406
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.106293194846087e-05,
+ "loss": 0.1748,
+ "step": 4407
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.102532415660571e-05,
+ "loss": 0.177,
+ "step": 4408
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.098771764434353e-05,
+ "loss": 0.1896,
+ "step": 4409
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.095011241703623e-05,
+ "loss": 0.1702,
+ "step": 4410
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.091250848004549e-05,
+ "loss": 0.18,
+ "step": 4411
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.087490583873284e-05,
+ "loss": 0.1788,
+ "step": 4412
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.083730449845954e-05,
+ "loss": 0.1782,
+ "step": 4413
+ },
+ {
+ "epoch": 5.22,
+ "learning_rate": 9.079970446458677e-05,
+ "loss": 0.1799,
+ "step": 4414
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.076210574247543e-05,
+ "loss": 0.1827,
+ "step": 4415
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.072450833748637e-05,
+ "loss": 0.1981,
+ "step": 4416
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.068691225498004e-05,
+ "loss": 0.1687,
+ "step": 4417
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.064931750031688e-05,
+ "loss": 0.1759,
+ "step": 4418
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.061172407885707e-05,
+ "loss": 0.1934,
+ "step": 4419
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.057413199596065e-05,
+ "loss": 0.1791,
+ "step": 4420
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.053654125698738e-05,
+ "loss": 0.1718,
+ "step": 4421
+ },
+ {
+ "epoch": 5.23,
+ "learning_rate": 9.049895186729688e-05,
+ "loss": 0.178,
+ "step": 4422
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.046136383224862e-05,
+ "loss": 0.1848,
+ "step": 4423
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.042377715720182e-05,
+ "loss": 0.1726,
+ "step": 4424
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.038619184751549e-05,
+ "loss": 0.1924,
+ "step": 4425
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.034860790854849e-05,
+ "loss": 0.1786,
+ "step": 4426
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.031102534565949e-05,
+ "loss": 0.1899,
+ "step": 4427
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.027344416420695e-05,
+ "loss": 0.1987,
+ "step": 4428
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.023586436954909e-05,
+ "loss": 0.2179,
+ "step": 4429
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.019828596704394e-05,
+ "loss": 0.1737,
+ "step": 4430
+ },
+ {
+ "epoch": 5.24,
+ "learning_rate": 9.016070896204943e-05,
+ "loss": 0.1978,
+ "step": 4431
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 9.01231333599232e-05,
+ "loss": 0.1794,
+ "step": 4432
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 9.008555916602276e-05,
+ "loss": 0.1758,
+ "step": 4433
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 9.004798638570527e-05,
+ "loss": 0.1792,
+ "step": 4434
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 9.001041502432783e-05,
+ "loss": 0.1782,
+ "step": 4435
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 8.99728450872473e-05,
+ "loss": 0.186,
+ "step": 4436
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 8.993527657982036e-05,
+ "loss": 0.1911,
+ "step": 4437
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 8.989770950740344e-05,
+ "loss": 0.1758,
+ "step": 4438
+ },
+ {
+ "epoch": 5.25,
+ "learning_rate": 8.986014387535275e-05,
+ "loss": 0.1783,
+ "step": 4439
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.982257968902438e-05,
+ "loss": 0.1789,
+ "step": 4440
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.978501695377415e-05,
+ "loss": 0.1784,
+ "step": 4441
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.974745567495768e-05,
+ "loss": 0.184,
+ "step": 4442
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.970989585793039e-05,
+ "loss": 0.1774,
+ "step": 4443
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.967233750804747e-05,
+ "loss": 0.1653,
+ "step": 4444
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.963478063066402e-05,
+ "loss": 0.1782,
+ "step": 4445
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.959722523113469e-05,
+ "loss": 0.1687,
+ "step": 4446
+ },
+ {
+ "epoch": 5.26,
+ "learning_rate": 8.955967131481412e-05,
+ "loss": 0.1822,
+ "step": 4447
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.952211888705668e-05,
+ "loss": 0.1862,
+ "step": 4448
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.948456795321657e-05,
+ "loss": 0.1959,
+ "step": 4449
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.944701851864767e-05,
+ "loss": 0.1898,
+ "step": 4450
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.940947058870373e-05,
+ "loss": 0.1803,
+ "step": 4451
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.937192416873828e-05,
+ "loss": 0.1812,
+ "step": 4452
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.933437926410463e-05,
+ "loss": 0.1868,
+ "step": 4453
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.929683588015582e-05,
+ "loss": 0.1829,
+ "step": 4454
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.925929402224475e-05,
+ "loss": 0.1803,
+ "step": 4455
+ },
+ {
+ "epoch": 5.27,
+ "learning_rate": 8.922175369572407e-05,
+ "loss": 0.1763,
+ "step": 4456
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.918421490594623e-05,
+ "loss": 0.1831,
+ "step": 4457
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.914667765826338e-05,
+ "loss": 0.1721,
+ "step": 4458
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.910914195802754e-05,
+ "loss": 0.1716,
+ "step": 4459
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.907160781059052e-05,
+ "loss": 0.1785,
+ "step": 4460
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.903407522130386e-05,
+ "loss": 0.1745,
+ "step": 4461
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.899654419551886e-05,
+ "loss": 0.1902,
+ "step": 4462
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.895901473858663e-05,
+ "loss": 0.176,
+ "step": 4463
+ },
+ {
+ "epoch": 5.28,
+ "learning_rate": 8.892148685585805e-05,
+ "loss": 0.18,
+ "step": 4464
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.88839605526838e-05,
+ "loss": 0.1802,
+ "step": 4465
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.88464358344143e-05,
+ "loss": 0.1953,
+ "step": 4466
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.880891270639975e-05,
+ "loss": 0.1848,
+ "step": 4467
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.877139117399014e-05,
+ "loss": 0.2009,
+ "step": 4468
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.873387124253525e-05,
+ "loss": 0.1784,
+ "step": 4469
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.869635291738452e-05,
+ "loss": 0.1758,
+ "step": 4470
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.86588362038873e-05,
+ "loss": 0.1724,
+ "step": 4471
+ },
+ {
+ "epoch": 5.29,
+ "learning_rate": 8.862132110739266e-05,
+ "loss": 0.1668,
+ "step": 4472
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.858380763324948e-05,
+ "loss": 0.1696,
+ "step": 4473
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.854629578680624e-05,
+ "loss": 0.1691,
+ "step": 4474
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.85087855734114e-05,
+ "loss": 0.1742,
+ "step": 4475
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.847127699841307e-05,
+ "loss": 0.1727,
+ "step": 4476
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.84337700671592e-05,
+ "loss": 0.1749,
+ "step": 4477
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.839626478499738e-05,
+ "loss": 0.1822,
+ "step": 4478
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.835876115727509e-05,
+ "loss": 0.1784,
+ "step": 4479
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.832125918933954e-05,
+ "loss": 0.1907,
+ "step": 4480
+ },
+ {
+ "epoch": 5.3,
+ "learning_rate": 8.82837588865377e-05,
+ "loss": 0.1825,
+ "step": 4481
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.824626025421626e-05,
+ "loss": 0.1856,
+ "step": 4482
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.82087632977217e-05,
+ "loss": 0.1809,
+ "step": 4483
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.81712680224003e-05,
+ "loss": 0.1737,
+ "step": 4484
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.81337744335981e-05,
+ "loss": 0.186,
+ "step": 4485
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.809628253666079e-05,
+ "loss": 0.1741,
+ "step": 4486
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.805879233693393e-05,
+ "loss": 0.1857,
+ "step": 4487
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.802130383976279e-05,
+ "loss": 0.1741,
+ "step": 4488
+ },
+ {
+ "epoch": 5.31,
+ "learning_rate": 8.798381705049248e-05,
+ "loss": 0.1881,
+ "step": 4489
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.79463319744677e-05,
+ "loss": 0.1785,
+ "step": 4490
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.790884861703308e-05,
+ "loss": 0.1839,
+ "step": 4491
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.787136698353289e-05,
+ "loss": 0.1728,
+ "step": 4492
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.783388707931122e-05,
+ "loss": 0.1799,
+ "step": 4493
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.779640890971186e-05,
+ "loss": 0.1759,
+ "step": 4494
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.775893248007839e-05,
+ "loss": 0.1855,
+ "step": 4495
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.772145779575413e-05,
+ "loss": 0.1856,
+ "step": 4496
+ },
+ {
+ "epoch": 5.32,
+ "learning_rate": 8.768398486208215e-05,
+ "loss": 0.1792,
+ "step": 4497
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.764651368440531e-05,
+ "loss": 0.1816,
+ "step": 4498
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.760904426806612e-05,
+ "loss": 0.183,
+ "step": 4499
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.757157661840693e-05,
+ "loss": 0.1811,
+ "step": 4500
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.753411074076982e-05,
+ "loss": 0.1745,
+ "step": 4501
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.749664664049663e-05,
+ "loss": 0.1768,
+ "step": 4502
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.745918432292887e-05,
+ "loss": 0.1762,
+ "step": 4503
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.742172379340785e-05,
+ "loss": 0.1864,
+ "step": 4504
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.738426505727466e-05,
+ "loss": 0.1759,
+ "step": 4505
+ },
+ {
+ "epoch": 5.33,
+ "learning_rate": 8.73468081198701e-05,
+ "loss": 0.2045,
+ "step": 4506
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.730935298653467e-05,
+ "loss": 0.1714,
+ "step": 4507
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.727189966260869e-05,
+ "loss": 0.1943,
+ "step": 4508
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.723444815343217e-05,
+ "loss": 0.1724,
+ "step": 4509
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.719699846434492e-05,
+ "loss": 0.1679,
+ "step": 4510
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.715955060068638e-05,
+ "loss": 0.1962,
+ "step": 4511
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.712210456779584e-05,
+ "loss": 0.1766,
+ "step": 4512
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.708466037101229e-05,
+ "loss": 0.1864,
+ "step": 4513
+ },
+ {
+ "epoch": 5.34,
+ "learning_rate": 8.704721801567448e-05,
+ "loss": 0.1746,
+ "step": 4514
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.70097775071208e-05,
+ "loss": 0.1707,
+ "step": 4515
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.69723388506895e-05,
+ "loss": 0.1717,
+ "step": 4516
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.693490205171846e-05,
+ "loss": 0.183,
+ "step": 4517
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.689746711554548e-05,
+ "loss": 0.1812,
+ "step": 4518
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.686003404750785e-05,
+ "loss": 0.1894,
+ "step": 4519
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.682260285294271e-05,
+ "loss": 0.1726,
+ "step": 4520
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.678517353718698e-05,
+ "loss": 0.1823,
+ "step": 4521
+ },
+ {
+ "epoch": 5.35,
+ "learning_rate": 8.674774610557728e-05,
+ "loss": 0.1754,
+ "step": 4522
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.671032056344988e-05,
+ "loss": 0.185,
+ "step": 4523
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.667289691614087e-05,
+ "loss": 0.1828,
+ "step": 4524
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.663547516898607e-05,
+ "loss": 0.2032,
+ "step": 4525
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.659805532732103e-05,
+ "loss": 0.1707,
+ "step": 4526
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.656063739648088e-05,
+ "loss": 0.1816,
+ "step": 4527
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.652322138180072e-05,
+ "loss": 0.1761,
+ "step": 4528
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.648580728861521e-05,
+ "loss": 0.1816,
+ "step": 4529
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.644839512225886e-05,
+ "loss": 0.1782,
+ "step": 4530
+ },
+ {
+ "epoch": 5.36,
+ "learning_rate": 8.64109848880657e-05,
+ "loss": 0.1844,
+ "step": 4531
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.637357659136967e-05,
+ "loss": 0.1915,
+ "step": 4532
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.63361702375044e-05,
+ "loss": 0.1759,
+ "step": 4533
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.629876583180321e-05,
+ "loss": 0.1769,
+ "step": 4534
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.626136337959914e-05,
+ "loss": 0.1888,
+ "step": 4535
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.622396288622497e-05,
+ "loss": 0.1784,
+ "step": 4536
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.618656435701318e-05,
+ "loss": 0.1936,
+ "step": 4537
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.614916779729603e-05,
+ "loss": 0.1701,
+ "step": 4538
+ },
+ {
+ "epoch": 5.37,
+ "learning_rate": 8.611177321240539e-05,
+ "loss": 0.1861,
+ "step": 4539
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.607438060767296e-05,
+ "loss": 0.1742,
+ "step": 4540
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.603698998843009e-05,
+ "loss": 0.1939,
+ "step": 4541
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.59996013600079e-05,
+ "loss": 0.1835,
+ "step": 4542
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.596221472773714e-05,
+ "loss": 0.1904,
+ "step": 4543
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.592483009694834e-05,
+ "loss": 0.1766,
+ "step": 4544
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.588744747297173e-05,
+ "loss": 0.1765,
+ "step": 4545
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.585006686113733e-05,
+ "loss": 0.1674,
+ "step": 4546
+ },
+ {
+ "epoch": 5.38,
+ "learning_rate": 8.58126882667747e-05,
+ "loss": 0.1759,
+ "step": 4547
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.577531169521324e-05,
+ "loss": 0.1673,
+ "step": 4548
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.573793715178206e-05,
+ "loss": 0.1788,
+ "step": 4549
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.570056464180998e-05,
+ "loss": 0.1751,
+ "step": 4550
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.566319417062543e-05,
+ "loss": 0.1804,
+ "step": 4551
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.562582574355666e-05,
+ "loss": 0.1896,
+ "step": 4552
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.55884593659316e-05,
+ "loss": 0.1899,
+ "step": 4553
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.55510950430779e-05,
+ "loss": 0.1898,
+ "step": 4554
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.551373278032284e-05,
+ "loss": 0.2023,
+ "step": 4555
+ },
+ {
+ "epoch": 5.39,
+ "learning_rate": 8.547637258299348e-05,
+ "loss": 0.186,
+ "step": 4556
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.54390144564166e-05,
+ "loss": 0.1719,
+ "step": 4557
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.540165840591867e-05,
+ "loss": 0.1735,
+ "step": 4558
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.53643044368258e-05,
+ "loss": 0.1762,
+ "step": 4559
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.532695255446383e-05,
+ "loss": 0.167,
+ "step": 4560
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.52896027641584e-05,
+ "loss": 0.1851,
+ "step": 4561
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.525225507123471e-05,
+ "loss": 0.1975,
+ "step": 4562
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.521490948101777e-05,
+ "loss": 0.1665,
+ "step": 4563
+ },
+ {
+ "epoch": 5.4,
+ "learning_rate": 8.517756599883224e-05,
+ "loss": 0.1691,
+ "step": 4564
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.514022463000244e-05,
+ "loss": 0.2015,
+ "step": 4565
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.51028853798525e-05,
+ "loss": 0.1806,
+ "step": 4566
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.506554825370615e-05,
+ "loss": 0.1743,
+ "step": 4567
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.502821325688684e-05,
+ "loss": 0.1819,
+ "step": 4568
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.499088039471774e-05,
+ "loss": 0.1787,
+ "step": 4569
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.495354967252169e-05,
+ "loss": 0.1879,
+ "step": 4570
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.49162210956213e-05,
+ "loss": 0.177,
+ "step": 4571
+ },
+ {
+ "epoch": 5.41,
+ "learning_rate": 8.48788946693387e-05,
+ "loss": 0.1779,
+ "step": 4572
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.48415703989959e-05,
+ "loss": 0.1748,
+ "step": 4573
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.480424828991448e-05,
+ "loss": 0.1807,
+ "step": 4574
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.476692834741585e-05,
+ "loss": 0.1697,
+ "step": 4575
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.472961057682092e-05,
+ "loss": 0.1974,
+ "step": 4576
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.469229498345042e-05,
+ "loss": 0.1773,
+ "step": 4577
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.465498157262474e-05,
+ "loss": 0.1737,
+ "step": 4578
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.4617670349664e-05,
+ "loss": 0.1799,
+ "step": 4579
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.458036131988792e-05,
+ "loss": 0.1753,
+ "step": 4580
+ },
+ {
+ "epoch": 5.42,
+ "learning_rate": 8.454305448861595e-05,
+ "loss": 0.1718,
+ "step": 4581
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.450574986116724e-05,
+ "loss": 0.1736,
+ "step": 4582
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.446844744286068e-05,
+ "loss": 0.1685,
+ "step": 4583
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.443114723901466e-05,
+ "loss": 0.2072,
+ "step": 4584
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.439384925494743e-05,
+ "loss": 0.5173,
+ "step": 4585
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.435655349597689e-05,
+ "loss": 0.1836,
+ "step": 4586
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.431925996742065e-05,
+ "loss": 0.2294,
+ "step": 4587
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.428196867459585e-05,
+ "loss": 0.1875,
+ "step": 4588
+ },
+ {
+ "epoch": 5.43,
+ "learning_rate": 8.424467962281945e-05,
+ "loss": 0.1836,
+ "step": 4589
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.420739281740805e-05,
+ "loss": 0.1751,
+ "step": 4590
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.417010826367799e-05,
+ "loss": 0.1763,
+ "step": 4591
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.413282596694516e-05,
+ "loss": 0.179,
+ "step": 4592
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.409554593252523e-05,
+ "loss": 0.1869,
+ "step": 4593
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.405826816573353e-05,
+ "loss": 0.2023,
+ "step": 4594
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.402099267188508e-05,
+ "loss": 0.1727,
+ "step": 4595
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.398371945629448e-05,
+ "loss": 0.1708,
+ "step": 4596
+ },
+ {
+ "epoch": 5.44,
+ "learning_rate": 8.394644852427615e-05,
+ "loss": 0.1766,
+ "step": 4597
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.390917988114406e-05,
+ "loss": 0.1744,
+ "step": 4598
+ },
+ {
+ "epoch": 5.45,
+ "eval_loss": 3.7694623470306396,
+ "eval_runtime": 283.9246,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 4598
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.387191353221198e-05,
+ "loss": 0.186,
+ "step": 4599
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.383464948279319e-05,
+ "loss": 0.2095,
+ "step": 4600
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.379738773820076e-05,
+ "loss": 0.1839,
+ "step": 4601
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.37601283037474e-05,
+ "loss": 0.1666,
+ "step": 4602
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.372287118474553e-05,
+ "loss": 0.1732,
+ "step": 4603
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.368561638650717e-05,
+ "loss": 0.176,
+ "step": 4604
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.364836391434402e-05,
+ "loss": 0.1774,
+ "step": 4605
+ },
+ {
+ "epoch": 5.45,
+ "learning_rate": 8.361111377356751e-05,
+ "loss": 0.1795,
+ "step": 4606
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.35738659694887e-05,
+ "loss": 0.1814,
+ "step": 4607
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.353662050741827e-05,
+ "loss": 0.176,
+ "step": 4608
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.349937739266665e-05,
+ "loss": 0.1716,
+ "step": 4609
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.346213663054387e-05,
+ "loss": 0.1784,
+ "step": 4610
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.342489822635971e-05,
+ "loss": 0.1961,
+ "step": 4611
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.338766218542347e-05,
+ "loss": 0.1948,
+ "step": 4612
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.335042851304421e-05,
+ "loss": 0.1819,
+ "step": 4613
+ },
+ {
+ "epoch": 5.46,
+ "learning_rate": 8.33131972145307e-05,
+ "loss": 0.172,
+ "step": 4614
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.327596829519132e-05,
+ "loss": 0.1786,
+ "step": 4615
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.3238741760334e-05,
+ "loss": 0.1888,
+ "step": 4616
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.32015176152665e-05,
+ "loss": 0.1794,
+ "step": 4617
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.316429586529615e-05,
+ "loss": 0.1766,
+ "step": 4618
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.312707651573e-05,
+ "loss": 0.1726,
+ "step": 4619
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.308985957187466e-05,
+ "loss": 0.1708,
+ "step": 4620
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.30526450390365e-05,
+ "loss": 0.204,
+ "step": 4621
+ },
+ {
+ "epoch": 5.47,
+ "learning_rate": 8.301543292252146e-05,
+ "loss": 0.18,
+ "step": 4622
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.297822322763526e-05,
+ "loss": 0.1852,
+ "step": 4623
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.294101595968304e-05,
+ "loss": 0.1956,
+ "step": 4624
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.290381112396987e-05,
+ "loss": 0.1782,
+ "step": 4625
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.286660872580032e-05,
+ "loss": 0.1778,
+ "step": 4626
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.282940877047864e-05,
+ "loss": 0.1855,
+ "step": 4627
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.279221126330874e-05,
+ "loss": 0.176,
+ "step": 4628
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.275501620959414e-05,
+ "loss": 0.1777,
+ "step": 4629
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.271782361463805e-05,
+ "loss": 0.1753,
+ "step": 4630
+ },
+ {
+ "epoch": 5.48,
+ "learning_rate": 8.268063348374334e-05,
+ "loss": 0.1918,
+ "step": 4631
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.264344582221252e-05,
+ "loss": 0.2044,
+ "step": 4632
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.260626063534772e-05,
+ "loss": 0.1755,
+ "step": 4633
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.256907792845072e-05,
+ "loss": 0.1666,
+ "step": 4634
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.253189770682301e-05,
+ "loss": 0.1709,
+ "step": 4635
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.249471997576565e-05,
+ "loss": 0.1813,
+ "step": 4636
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.245754474057937e-05,
+ "loss": 0.1806,
+ "step": 4637
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.242037200656455e-05,
+ "loss": 0.1723,
+ "step": 4638
+ },
+ {
+ "epoch": 5.49,
+ "learning_rate": 8.238320177902121e-05,
+ "loss": 0.1792,
+ "step": 4639
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.234603406324908e-05,
+ "loss": 0.1744,
+ "step": 4640
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.230886886454734e-05,
+ "loss": 0.1718,
+ "step": 4641
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.227170618821499e-05,
+ "loss": 0.1783,
+ "step": 4642
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.223454603955065e-05,
+ "loss": 0.1656,
+ "step": 4643
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.219738842385256e-05,
+ "loss": 0.1761,
+ "step": 4644
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.216023334641851e-05,
+ "loss": 0.1736,
+ "step": 4645
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.212308081254605e-05,
+ "loss": 0.1811,
+ "step": 4646
+ },
+ {
+ "epoch": 5.5,
+ "learning_rate": 8.208593082753232e-05,
+ "loss": 0.1806,
+ "step": 4647
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.20487833966741e-05,
+ "loss": 0.1751,
+ "step": 4648
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.201163852526779e-05,
+ "loss": 0.1731,
+ "step": 4649
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.197449621860943e-05,
+ "loss": 0.1744,
+ "step": 4650
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.193735648199472e-05,
+ "loss": 0.1788,
+ "step": 4651
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.1900219320719e-05,
+ "loss": 0.1692,
+ "step": 4652
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.186308474007716e-05,
+ "loss": 0.1795,
+ "step": 4653
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.182595274536383e-05,
+ "loss": 0.1855,
+ "step": 4654
+ },
+ {
+ "epoch": 5.51,
+ "learning_rate": 8.178882334187319e-05,
+ "loss": 0.177,
+ "step": 4655
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.175169653489915e-05,
+ "loss": 0.1849,
+ "step": 4656
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.171457232973509e-05,
+ "loss": 0.1645,
+ "step": 4657
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.167745073167415e-05,
+ "loss": 0.1802,
+ "step": 4658
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.164033174600905e-05,
+ "loss": 0.2054,
+ "step": 4659
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.16032153780322e-05,
+ "loss": 0.1804,
+ "step": 4660
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.156610163303554e-05,
+ "loss": 0.1704,
+ "step": 4661
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.152899051631065e-05,
+ "loss": 0.1784,
+ "step": 4662
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.149188203314883e-05,
+ "loss": 0.1733,
+ "step": 4663
+ },
+ {
+ "epoch": 5.52,
+ "learning_rate": 8.145477618884093e-05,
+ "loss": 0.1738,
+ "step": 4664
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.141767298867738e-05,
+ "loss": 0.183,
+ "step": 4665
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.138057243794833e-05,
+ "loss": 0.1715,
+ "step": 4666
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.134347454194351e-05,
+ "loss": 0.1915,
+ "step": 4667
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.130637930595231e-05,
+ "loss": 0.1755,
+ "step": 4668
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.12692867352636e-05,
+ "loss": 0.1781,
+ "step": 4669
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.123219683516603e-05,
+ "loss": 0.1676,
+ "step": 4670
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.119510961094778e-05,
+ "loss": 0.1843,
+ "step": 4671
+ },
+ {
+ "epoch": 5.53,
+ "learning_rate": 8.115802506789679e-05,
+ "loss": 0.1848,
+ "step": 4672
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.112094321130035e-05,
+ "loss": 0.1824,
+ "step": 4673
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.108386404644561e-05,
+ "loss": 0.1844,
+ "step": 4674
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.104678757861925e-05,
+ "loss": 0.1742,
+ "step": 4675
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.100971381310756e-05,
+ "loss": 0.1749,
+ "step": 4676
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.097264275519642e-05,
+ "loss": 0.1754,
+ "step": 4677
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.093557441017139e-05,
+ "loss": 0.1897,
+ "step": 4678
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.089850878331758e-05,
+ "loss": 0.2023,
+ "step": 4679
+ },
+ {
+ "epoch": 5.54,
+ "learning_rate": 8.08614458799198e-05,
+ "loss": 0.1795,
+ "step": 4680
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.082438570526232e-05,
+ "loss": 0.1939,
+ "step": 4681
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.078732826462915e-05,
+ "loss": 0.1919,
+ "step": 4682
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.075027356330391e-05,
+ "loss": 0.1789,
+ "step": 4683
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.07132216065698e-05,
+ "loss": 0.1798,
+ "step": 4684
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.067617239970952e-05,
+ "loss": 0.1861,
+ "step": 4685
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.063912594800556e-05,
+ "loss": 0.1874,
+ "step": 4686
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.060208225673992e-05,
+ "loss": 0.2252,
+ "step": 4687
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.056504133119424e-05,
+ "loss": 0.2101,
+ "step": 4688
+ },
+ {
+ "epoch": 5.55,
+ "learning_rate": 8.052800317664971e-05,
+ "loss": 0.1923,
+ "step": 4689
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.049096779838719e-05,
+ "loss": 0.1833,
+ "step": 4690
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.04539352016871e-05,
+ "loss": 0.1755,
+ "step": 4691
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.041690539182947e-05,
+ "loss": 0.1772,
+ "step": 4692
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.037987837409402e-05,
+ "loss": 0.1745,
+ "step": 4693
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.03428541537599e-05,
+ "loss": 0.1801,
+ "step": 4694
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.0305832736106e-05,
+ "loss": 0.1726,
+ "step": 4695
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.026881412641073e-05,
+ "loss": 0.179,
+ "step": 4696
+ },
+ {
+ "epoch": 5.56,
+ "learning_rate": 8.023179832995225e-05,
+ "loss": 0.1757,
+ "step": 4697
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 8.019478535200806e-05,
+ "loss": 0.1813,
+ "step": 4698
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 8.015777519785546e-05,
+ "loss": 0.1899,
+ "step": 4699
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 8.01207678727713e-05,
+ "loss": 0.1797,
+ "step": 4700
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 8.008376338203205e-05,
+ "loss": 0.1836,
+ "step": 4701
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 8.004676173091368e-05,
+ "loss": 0.179,
+ "step": 4702
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 8.000976292469183e-05,
+ "loss": 0.1698,
+ "step": 4703
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 7.997276696864175e-05,
+ "loss": 0.1911,
+ "step": 4704
+ },
+ {
+ "epoch": 5.57,
+ "learning_rate": 7.993577386803827e-05,
+ "loss": 0.1713,
+ "step": 4705
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.989878362815573e-05,
+ "loss": 0.1673,
+ "step": 4706
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.98617962542682e-05,
+ "loss": 0.191,
+ "step": 4707
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.982481175164923e-05,
+ "loss": 0.1736,
+ "step": 4708
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.978783012557207e-05,
+ "loss": 0.1799,
+ "step": 4709
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.975085138130938e-05,
+ "loss": 0.1757,
+ "step": 4710
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.971387552413361e-05,
+ "loss": 0.1794,
+ "step": 4711
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.967690255931667e-05,
+ "loss": 0.1823,
+ "step": 4712
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.963993249213017e-05,
+ "loss": 0.1753,
+ "step": 4713
+ },
+ {
+ "epoch": 5.58,
+ "learning_rate": 7.960296532784515e-05,
+ "loss": 0.1684,
+ "step": 4714
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.956600107173233e-05,
+ "loss": 0.1693,
+ "step": 4715
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.952903972906204e-05,
+ "loss": 0.1835,
+ "step": 4716
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.949208130510417e-05,
+ "loss": 0.1808,
+ "step": 4717
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.945512580512813e-05,
+ "loss": 0.183,
+ "step": 4718
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.941817323440302e-05,
+ "loss": 0.1765,
+ "step": 4719
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.938122359819746e-05,
+ "loss": 0.1776,
+ "step": 4720
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.934427690177965e-05,
+ "loss": 0.1898,
+ "step": 4721
+ },
+ {
+ "epoch": 5.59,
+ "learning_rate": 7.930733315041739e-05,
+ "loss": 0.187,
+ "step": 4722
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.927039234937804e-05,
+ "loss": 0.1785,
+ "step": 4723
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.923345450392856e-05,
+ "loss": 0.1768,
+ "step": 4724
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.919651961933553e-05,
+ "loss": 0.1766,
+ "step": 4725
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.915958770086498e-05,
+ "loss": 0.1743,
+ "step": 4726
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.912265875378262e-05,
+ "loss": 0.1763,
+ "step": 4727
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.908573278335371e-05,
+ "loss": 0.1819,
+ "step": 4728
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.904880979484315e-05,
+ "loss": 0.1785,
+ "step": 4729
+ },
+ {
+ "epoch": 5.6,
+ "learning_rate": 7.901188979351526e-05,
+ "loss": 0.1717,
+ "step": 4730
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.897497278463409e-05,
+ "loss": 0.1737,
+ "step": 4731
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.893805877346316e-05,
+ "loss": 0.1755,
+ "step": 4732
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.890114776526564e-05,
+ "loss": 0.1739,
+ "step": 4733
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.88642397653042e-05,
+ "loss": 0.1754,
+ "step": 4734
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.882733477884115e-05,
+ "loss": 0.1772,
+ "step": 4735
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.87904328111383e-05,
+ "loss": 0.1818,
+ "step": 4736
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.875353386745713e-05,
+ "loss": 0.1781,
+ "step": 4737
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.871663795305855e-05,
+ "loss": 0.1714,
+ "step": 4738
+ },
+ {
+ "epoch": 5.61,
+ "learning_rate": 7.867974507320311e-05,
+ "loss": 0.1952,
+ "step": 4739
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.864285523315096e-05,
+ "loss": 0.1803,
+ "step": 4740
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.860596843816187e-05,
+ "loss": 0.1843,
+ "step": 4741
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.856908469349495e-05,
+ "loss": 0.1907,
+ "step": 4742
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.853220400440907e-05,
+ "loss": 0.1848,
+ "step": 4743
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.849532637616264e-05,
+ "loss": 0.1849,
+ "step": 4744
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.845845181401358e-05,
+ "loss": 0.1878,
+ "step": 4745
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.84215803232194e-05,
+ "loss": 0.1849,
+ "step": 4746
+ },
+ {
+ "epoch": 5.62,
+ "learning_rate": 7.838471190903717e-05,
+ "loss": 0.1778,
+ "step": 4747
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.834784657672353e-05,
+ "loss": 0.1925,
+ "step": 4748
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.831098433153467e-05,
+ "loss": 0.18,
+ "step": 4749
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.827412517872634e-05,
+ "loss": 0.1823,
+ "step": 4750
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.823726912355384e-05,
+ "loss": 0.1821,
+ "step": 4751
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.820041617127205e-05,
+ "loss": 0.1732,
+ "step": 4752
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.816356632713545e-05,
+ "loss": 0.1758,
+ "step": 4753
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.812671959639791e-05,
+ "loss": 0.1701,
+ "step": 4754
+ },
+ {
+ "epoch": 5.63,
+ "learning_rate": 7.808987598431303e-05,
+ "loss": 0.1795,
+ "step": 4755
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.805303549613392e-05,
+ "loss": 0.1732,
+ "step": 4756
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.80161981371132e-05,
+ "loss": 0.1791,
+ "step": 4757
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.797936391250314e-05,
+ "loss": 0.2083,
+ "step": 4758
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.794253282755541e-05,
+ "loss": 0.1894,
+ "step": 4759
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.790570488752135e-05,
+ "loss": 0.1774,
+ "step": 4760
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.786888009765185e-05,
+ "loss": 0.1696,
+ "step": 4761
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.78320584631973e-05,
+ "loss": 0.1693,
+ "step": 4762
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.779523998940766e-05,
+ "loss": 0.1744,
+ "step": 4763
+ },
+ {
+ "epoch": 5.64,
+ "learning_rate": 7.775842468153242e-05,
+ "loss": 0.176,
+ "step": 4764
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.772161254482068e-05,
+ "loss": 0.1683,
+ "step": 4765
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.768480358452107e-05,
+ "loss": 0.1765,
+ "step": 4766
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.764799780588164e-05,
+ "loss": 0.1836,
+ "step": 4767
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.761119521415016e-05,
+ "loss": 0.1803,
+ "step": 4768
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.757439581457388e-05,
+ "loss": 0.1745,
+ "step": 4769
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.753759961239964e-05,
+ "loss": 0.1705,
+ "step": 4770
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.750080661287366e-05,
+ "loss": 0.1818,
+ "step": 4771
+ },
+ {
+ "epoch": 5.65,
+ "learning_rate": 7.74640168212419e-05,
+ "loss": 0.1837,
+ "step": 4772
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.742723024274974e-05,
+ "loss": 0.1956,
+ "step": 4773
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.73904468826422e-05,
+ "loss": 0.1773,
+ "step": 4774
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.735366674616372e-05,
+ "loss": 0.1798,
+ "step": 4775
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.731688983855838e-05,
+ "loss": 0.1771,
+ "step": 4776
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.728011616506976e-05,
+ "loss": 0.1868,
+ "step": 4777
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.7243345730941e-05,
+ "loss": 0.1735,
+ "step": 4778
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.720657854141475e-05,
+ "loss": 0.1727,
+ "step": 4779
+ },
+ {
+ "epoch": 5.66,
+ "learning_rate": 7.716981460173319e-05,
+ "loss": 0.1735,
+ "step": 4780
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.713305391713806e-05,
+ "loss": 0.1809,
+ "step": 4781
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.709629649287068e-05,
+ "loss": 0.2148,
+ "step": 4782
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.705954233417179e-05,
+ "loss": 0.1791,
+ "step": 4783
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.702279144628174e-05,
+ "loss": 0.1848,
+ "step": 4784
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.698604383444044e-05,
+ "loss": 0.1745,
+ "step": 4785
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.694929950388732e-05,
+ "loss": 0.1702,
+ "step": 4786
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.691255845986124e-05,
+ "loss": 0.1764,
+ "step": 4787
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.687582070760073e-05,
+ "loss": 0.171,
+ "step": 4788
+ },
+ {
+ "epoch": 5.67,
+ "learning_rate": 7.683908625234376e-05,
+ "loss": 0.1764,
+ "step": 4789
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.68023550993279e-05,
+ "loss": 0.1831,
+ "step": 4790
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.676562725379018e-05,
+ "loss": 0.1914,
+ "step": 4791
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.67289027209672e-05,
+ "loss": 0.1752,
+ "step": 4792
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.669218150609507e-05,
+ "loss": 0.1706,
+ "step": 4793
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.66554636144095e-05,
+ "loss": 0.176,
+ "step": 4794
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.661874905114554e-05,
+ "loss": 0.1786,
+ "step": 4795
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.658203782153793e-05,
+ "loss": 0.1759,
+ "step": 4796
+ },
+ {
+ "epoch": 5.68,
+ "learning_rate": 7.654532993082092e-05,
+ "loss": 0.1718,
+ "step": 4797
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.650862538422831e-05,
+ "loss": 0.1704,
+ "step": 4798
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.647192418699324e-05,
+ "loss": 0.1814,
+ "step": 4799
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.643522634434856e-05,
+ "loss": 0.1762,
+ "step": 4800
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.639853186152659e-05,
+ "loss": 0.1777,
+ "step": 4801
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.636184074375917e-05,
+ "loss": 0.1763,
+ "step": 4802
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.632515299627763e-05,
+ "loss": 0.1798,
+ "step": 4803
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.628846862431283e-05,
+ "loss": 0.1874,
+ "step": 4804
+ },
+ {
+ "epoch": 5.69,
+ "learning_rate": 7.625178763309519e-05,
+ "loss": 0.1853,
+ "step": 4805
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.621511002785467e-05,
+ "loss": 0.1894,
+ "step": 4806
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.617843581382055e-05,
+ "loss": 0.1831,
+ "step": 4807
+ },
+ {
+ "epoch": 5.7,
+ "eval_loss": 3.838498115539551,
+ "eval_runtime": 283.7734,
+ "eval_samples_per_second": 0.726,
+ "eval_steps_per_second": 0.726,
+ "step": 4807
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.614176499622189e-05,
+ "loss": 0.1784,
+ "step": 4808
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.610509758028712e-05,
+ "loss": 0.1802,
+ "step": 4809
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.606843357124426e-05,
+ "loss": 0.173,
+ "step": 4810
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.603177297432069e-05,
+ "loss": 0.187,
+ "step": 4811
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.59951157947435e-05,
+ "loss": 0.1684,
+ "step": 4812
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.595846203773916e-05,
+ "loss": 0.1786,
+ "step": 4813
+ },
+ {
+ "epoch": 5.7,
+ "learning_rate": 7.592181170853373e-05,
+ "loss": 0.171,
+ "step": 4814
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.588516481235271e-05,
+ "loss": 0.1713,
+ "step": 4815
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.584852135442118e-05,
+ "loss": 0.1772,
+ "step": 4816
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.581188133996368e-05,
+ "loss": 0.1921,
+ "step": 4817
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.57752447742043e-05,
+ "loss": 0.172,
+ "step": 4818
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.573861166236658e-05,
+ "loss": 0.1688,
+ "step": 4819
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.570198200967362e-05,
+ "loss": 0.1703,
+ "step": 4820
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.5665355821348e-05,
+ "loss": 0.1765,
+ "step": 4821
+ },
+ {
+ "epoch": 5.71,
+ "learning_rate": 7.562873310261183e-05,
+ "loss": 0.1764,
+ "step": 4822
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.559211385868677e-05,
+ "loss": 0.1923,
+ "step": 4823
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.55554980947938e-05,
+ "loss": 0.1832,
+ "step": 4824
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.551888581615357e-05,
+ "loss": 0.176,
+ "step": 4825
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.548227702798624e-05,
+ "loss": 0.1836,
+ "step": 4826
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.544567173551143e-05,
+ "loss": 0.1821,
+ "step": 4827
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.54090699439482e-05,
+ "loss": 0.1685,
+ "step": 4828
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.537247165851518e-05,
+ "loss": 0.1979,
+ "step": 4829
+ },
+ {
+ "epoch": 5.72,
+ "learning_rate": 7.533587688443049e-05,
+ "loss": 0.1765,
+ "step": 4830
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.529928562691178e-05,
+ "loss": 0.1691,
+ "step": 4831
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.526269789117612e-05,
+ "loss": 0.192,
+ "step": 4832
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.522611368244016e-05,
+ "loss": 0.178,
+ "step": 4833
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.518953300591997e-05,
+ "loss": 0.1803,
+ "step": 4834
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.515295586683122e-05,
+ "loss": 0.1909,
+ "step": 4835
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.511638227038894e-05,
+ "loss": 0.1912,
+ "step": 4836
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.507981222180776e-05,
+ "loss": 0.172,
+ "step": 4837
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.504324572630177e-05,
+ "loss": 0.1784,
+ "step": 4838
+ },
+ {
+ "epoch": 5.73,
+ "learning_rate": 7.500668278908461e-05,
+ "loss": 0.1769,
+ "step": 4839
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.497012341536924e-05,
+ "loss": 0.171,
+ "step": 4840
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.493356761036829e-05,
+ "loss": 0.1712,
+ "step": 4841
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.489701537929384e-05,
+ "loss": 0.1866,
+ "step": 4842
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.486046672735743e-05,
+ "loss": 0.1782,
+ "step": 4843
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.482392165977008e-05,
+ "loss": 0.1797,
+ "step": 4844
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.478738018174234e-05,
+ "loss": 0.1795,
+ "step": 4845
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.47508422984842e-05,
+ "loss": 0.1756,
+ "step": 4846
+ },
+ {
+ "epoch": 5.74,
+ "learning_rate": 7.471430801520522e-05,
+ "loss": 0.1957,
+ "step": 4847
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.467777733711434e-05,
+ "loss": 0.1782,
+ "step": 4848
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.464125026942003e-05,
+ "loss": 0.1986,
+ "step": 4849
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.460472681733031e-05,
+ "loss": 0.1792,
+ "step": 4850
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.456820698605263e-05,
+ "loss": 0.1784,
+ "step": 4851
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.453169078079382e-05,
+ "loss": 0.1795,
+ "step": 4852
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.44951782067604e-05,
+ "loss": 0.1843,
+ "step": 4853
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.445866926915818e-05,
+ "loss": 0.1772,
+ "step": 4854
+ },
+ {
+ "epoch": 5.75,
+ "learning_rate": 7.442216397319266e-05,
+ "loss": 0.1709,
+ "step": 4855
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.438566232406858e-05,
+ "loss": 0.1707,
+ "step": 4856
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.434916432699033e-05,
+ "loss": 0.1753,
+ "step": 4857
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.431266998716171e-05,
+ "loss": 0.1781,
+ "step": 4858
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.427617930978606e-05,
+ "loss": 0.1829,
+ "step": 4859
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.423969230006609e-05,
+ "loss": 0.1949,
+ "step": 4860
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.42032089632041e-05,
+ "loss": 0.1678,
+ "step": 4861
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.41667293044018e-05,
+ "loss": 0.1919,
+ "step": 4862
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.413025332886044e-05,
+ "loss": 0.1812,
+ "step": 4863
+ },
+ {
+ "epoch": 5.76,
+ "learning_rate": 7.409378104178059e-05,
+ "loss": 0.1851,
+ "step": 4864
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.40573124483625e-05,
+ "loss": 0.1688,
+ "step": 4865
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.402084755380574e-05,
+ "loss": 0.2043,
+ "step": 4866
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.398438636330948e-05,
+ "loss": 0.1748,
+ "step": 4867
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.394792888207221e-05,
+ "loss": 0.1867,
+ "step": 4868
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.391147511529202e-05,
+ "loss": 0.1745,
+ "step": 4869
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.387502506816638e-05,
+ "loss": 0.1715,
+ "step": 4870
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.383857874589232e-05,
+ "loss": 0.1789,
+ "step": 4871
+ },
+ {
+ "epoch": 5.77,
+ "learning_rate": 7.380213615366627e-05,
+ "loss": 0.1724,
+ "step": 4872
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.376569729668413e-05,
+ "loss": 0.181,
+ "step": 4873
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.372926218014131e-05,
+ "loss": 0.1782,
+ "step": 4874
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.369283080923269e-05,
+ "loss": 0.1729,
+ "step": 4875
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.36564031891525e-05,
+ "loss": 0.1832,
+ "step": 4876
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.361997932509461e-05,
+ "loss": 0.1864,
+ "step": 4877
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.358355922225222e-05,
+ "loss": 0.1905,
+ "step": 4878
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.35471428858181e-05,
+ "loss": 0.1911,
+ "step": 4879
+ },
+ {
+ "epoch": 5.78,
+ "learning_rate": 7.351073032098437e-05,
+ "loss": 0.1809,
+ "step": 4880
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.347432153294265e-05,
+ "loss": 0.1757,
+ "step": 4881
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.34379165268841e-05,
+ "loss": 0.1751,
+ "step": 4882
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.340151530799926e-05,
+ "loss": 0.1772,
+ "step": 4883
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.336511788147811e-05,
+ "loss": 0.1763,
+ "step": 4884
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.332872425251018e-05,
+ "loss": 0.1721,
+ "step": 4885
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.329233442628437e-05,
+ "loss": 0.1848,
+ "step": 4886
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.325594840798911e-05,
+ "loss": 0.1752,
+ "step": 4887
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.321956620281223e-05,
+ "loss": 0.1905,
+ "step": 4888
+ },
+ {
+ "epoch": 5.79,
+ "learning_rate": 7.318318781594106e-05,
+ "loss": 0.1787,
+ "step": 4889
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.314681325256232e-05,
+ "loss": 0.1739,
+ "step": 4890
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.311044251786227e-05,
+ "loss": 0.171,
+ "step": 4891
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.307407561702662e-05,
+ "loss": 0.1822,
+ "step": 4892
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.303771255524038e-05,
+ "loss": 0.1766,
+ "step": 4893
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.300135333768821e-05,
+ "loss": 0.1742,
+ "step": 4894
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.296499796955414e-05,
+ "loss": 0.1805,
+ "step": 4895
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.292864645602169e-05,
+ "loss": 0.1762,
+ "step": 4896
+ },
+ {
+ "epoch": 5.8,
+ "learning_rate": 7.28922988022737e-05,
+ "loss": 0.1718,
+ "step": 4897
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.285595501349258e-05,
+ "loss": 0.175,
+ "step": 4898
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.28196150948602e-05,
+ "loss": 0.1846,
+ "step": 4899
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.278327905155783e-05,
+ "loss": 0.1742,
+ "step": 4900
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.274694688876616e-05,
+ "loss": 0.179,
+ "step": 4901
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.271061861166539e-05,
+ "loss": 0.1791,
+ "step": 4902
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.267429422543514e-05,
+ "loss": 0.1721,
+ "step": 4903
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.263797373525451e-05,
+ "loss": 0.1768,
+ "step": 4904
+ },
+ {
+ "epoch": 5.81,
+ "learning_rate": 7.260165714630195e-05,
+ "loss": 0.1778,
+ "step": 4905
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.256534446375542e-05,
+ "loss": 0.1717,
+ "step": 4906
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.252903569279235e-05,
+ "loss": 0.1813,
+ "step": 4907
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.249273083858961e-05,
+ "loss": 0.1715,
+ "step": 4908
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.245642990632338e-05,
+ "loss": 0.1691,
+ "step": 4909
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.242013290116944e-05,
+ "loss": 0.1732,
+ "step": 4910
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.238383982830292e-05,
+ "loss": 0.1861,
+ "step": 4911
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.23475506928985e-05,
+ "loss": 0.2166,
+ "step": 4912
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.231126550013015e-05,
+ "loss": 0.1835,
+ "step": 4913
+ },
+ {
+ "epoch": 5.82,
+ "learning_rate": 7.227498425517134e-05,
+ "loss": 0.1663,
+ "step": 4914
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.2238706963195e-05,
+ "loss": 0.1761,
+ "step": 4915
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.220243362937352e-05,
+ "loss": 0.1767,
+ "step": 4916
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.216616425887863e-05,
+ "loss": 0.1745,
+ "step": 4917
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.212989885688157e-05,
+ "loss": 0.1798,
+ "step": 4918
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.209363742855302e-05,
+ "loss": 0.1746,
+ "step": 4919
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.205737997906307e-05,
+ "loss": 0.1923,
+ "step": 4920
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.202112651358117e-05,
+ "loss": 0.189,
+ "step": 4921
+ },
+ {
+ "epoch": 5.83,
+ "learning_rate": 7.198487703727632e-05,
+ "loss": 0.1773,
+ "step": 4922
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.194863155531692e-05,
+ "loss": 0.1739,
+ "step": 4923
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.191239007287081e-05,
+ "loss": 0.1778,
+ "step": 4924
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.187615259510516e-05,
+ "loss": 0.1753,
+ "step": 4925
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.183991912718669e-05,
+ "loss": 0.1735,
+ "step": 4926
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.180368967428149e-05,
+ "loss": 0.1783,
+ "step": 4927
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.176746424155512e-05,
+ "loss": 0.1808,
+ "step": 4928
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.173124283417247e-05,
+ "loss": 0.1768,
+ "step": 4929
+ },
+ {
+ "epoch": 5.84,
+ "learning_rate": 7.169502545729797e-05,
+ "loss": 0.1766,
+ "step": 4930
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.165881211609543e-05,
+ "loss": 0.2007,
+ "step": 4931
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.162260281572808e-05,
+ "loss": 0.1848,
+ "step": 4932
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.158639756135854e-05,
+ "loss": 0.1824,
+ "step": 4933
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.155019635814894e-05,
+ "loss": 0.1754,
+ "step": 4934
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.151399921126075e-05,
+ "loss": 0.1852,
+ "step": 4935
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.147780612585495e-05,
+ "loss": 0.1667,
+ "step": 4936
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.144161710709178e-05,
+ "loss": 0.1794,
+ "step": 4937
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.14054321601311e-05,
+ "loss": 0.1644,
+ "step": 4938
+ },
+ {
+ "epoch": 5.85,
+ "learning_rate": 7.136925129013203e-05,
+ "loss": 0.1897,
+ "step": 4939
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.133307450225322e-05,
+ "loss": 0.1778,
+ "step": 4940
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.129690180165266e-05,
+ "loss": 0.1793,
+ "step": 4941
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.12607331934878e-05,
+ "loss": 0.1799,
+ "step": 4942
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.122456868291548e-05,
+ "loss": 0.1688,
+ "step": 4943
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.118840827509201e-05,
+ "loss": 0.1713,
+ "step": 4944
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.115225197517304e-05,
+ "loss": 0.1786,
+ "step": 4945
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.111609978831367e-05,
+ "loss": 0.1736,
+ "step": 4946
+ },
+ {
+ "epoch": 5.86,
+ "learning_rate": 7.107995171966842e-05,
+ "loss": 0.1861,
+ "step": 4947
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.104380777439127e-05,
+ "loss": 0.1745,
+ "step": 4948
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.100766795763546e-05,
+ "loss": 0.1896,
+ "step": 4949
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.097153227455379e-05,
+ "loss": 0.1737,
+ "step": 4950
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.093540073029839e-05,
+ "loss": 0.2093,
+ "step": 4951
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.089927333002086e-05,
+ "loss": 0.1994,
+ "step": 4952
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.086315007887225e-05,
+ "loss": 0.1794,
+ "step": 4953
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.082703098200282e-05,
+ "loss": 0.1797,
+ "step": 4954
+ },
+ {
+ "epoch": 5.87,
+ "learning_rate": 7.079091604456241e-05,
+ "loss": 0.166,
+ "step": 4955
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.075480527170024e-05,
+ "loss": 0.1787,
+ "step": 4956
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.071869866856493e-05,
+ "loss": 0.1801,
+ "step": 4957
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.068259624030444e-05,
+ "loss": 0.1689,
+ "step": 4958
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.064649799206625e-05,
+ "loss": 0.1789,
+ "step": 4959
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.061040392899712e-05,
+ "loss": 0.174,
+ "step": 4960
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.057431405624335e-05,
+ "loss": 0.1745,
+ "step": 4961
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.053822837895051e-05,
+ "loss": 0.1763,
+ "step": 4962
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.050214690226364e-05,
+ "loss": 0.1777,
+ "step": 4963
+ },
+ {
+ "epoch": 5.88,
+ "learning_rate": 7.04660696313272e-05,
+ "loss": 0.1846,
+ "step": 4964
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.042999657128503e-05,
+ "loss": 0.1842,
+ "step": 4965
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.03939277272803e-05,
+ "loss": 0.1839,
+ "step": 4966
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.035786310445567e-05,
+ "loss": 0.1716,
+ "step": 4967
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.032180270795317e-05,
+ "loss": 0.2148,
+ "step": 4968
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.028574654291426e-05,
+ "loss": 0.1774,
+ "step": 4969
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.024969461447972e-05,
+ "loss": 0.1759,
+ "step": 4970
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.021364692778977e-05,
+ "loss": 0.1776,
+ "step": 4971
+ },
+ {
+ "epoch": 5.89,
+ "learning_rate": 7.017760348798403e-05,
+ "loss": 0.1946,
+ "step": 4972
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 7.014156430020153e-05,
+ "loss": 0.1739,
+ "step": 4973
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 7.010552936958066e-05,
+ "loss": 0.1983,
+ "step": 4974
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 7.006949870125919e-05,
+ "loss": 0.1863,
+ "step": 4975
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 7.003347230037433e-05,
+ "loss": 0.1755,
+ "step": 4976
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 6.99974501720627e-05,
+ "loss": 0.1776,
+ "step": 4977
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 6.99614323214602e-05,
+ "loss": 0.1922,
+ "step": 4978
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 6.992541875370217e-05,
+ "loss": 0.1898,
+ "step": 4979
+ },
+ {
+ "epoch": 5.9,
+ "learning_rate": 6.988940947392344e-05,
+ "loss": 0.1818,
+ "step": 4980
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.985340448725815e-05,
+ "loss": 0.1911,
+ "step": 4981
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.981740379883974e-05,
+ "loss": 0.1688,
+ "step": 4982
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.97814074138012e-05,
+ "loss": 0.1917,
+ "step": 4983
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.974541533727476e-05,
+ "loss": 0.1944,
+ "step": 4984
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.97094275743922e-05,
+ "loss": 0.187,
+ "step": 4985
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.967344413028452e-05,
+ "loss": 0.1714,
+ "step": 4986
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.963746501008217e-05,
+ "loss": 0.1714,
+ "step": 4987
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.9601490218915e-05,
+ "loss": 0.1814,
+ "step": 4988
+ },
+ {
+ "epoch": 5.91,
+ "learning_rate": 6.95655197619123e-05,
+ "loss": 0.1749,
+ "step": 4989
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.952955364420255e-05,
+ "loss": 0.1793,
+ "step": 4990
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.94935918709138e-05,
+ "loss": 0.17,
+ "step": 4991
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.945763444717341e-05,
+ "loss": 0.1717,
+ "step": 4992
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.942168137810818e-05,
+ "loss": 0.1876,
+ "step": 4993
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.938573266884413e-05,
+ "loss": 0.1788,
+ "step": 4994
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.93497883245068e-05,
+ "loss": 0.1773,
+ "step": 4995
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.931384835022109e-05,
+ "loss": 0.1843,
+ "step": 4996
+ },
+ {
+ "epoch": 5.92,
+ "learning_rate": 6.927791275111126e-05,
+ "loss": 0.1758,
+ "step": 4997
+ },
+ {
+ "epoch": 5.93,
+ "learning_rate": 6.924198153230091e-05,
+ "loss": 0.1831,
+ "step": 4998
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 8330,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 833,
+ "total_flos": 1.7518364490599498e+19,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-4998/trainer_state.json:com.dropbox.attrs b/checkpoint-4998/trainer_state.json:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..b506010066e65e7ae68ae7b46ab0441f9d3f17fa
Binary files /dev/null and b/checkpoint-4998/trainer_state.json:com.dropbox.attrs differ
diff --git a/checkpoint-4998/training_args.bin b/checkpoint-4998/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207
--- /dev/null
+++ b/checkpoint-4998/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869
+size 4859
diff --git a/checkpoint-4998/training_args.bin:com.dropbox.attrs b/checkpoint-4998/training_args.bin:com.dropbox.attrs
new file mode 100644
index 0000000000000000000000000000000000000000..2d1f4ba5a9acc961b9eae1552fe318689b93101f
Binary files /dev/null and b/checkpoint-4998/training_args.bin:com.dropbox.attrs differ
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..de37db9e3fa375d521fc198b3dbfa62a616402c5
--- /dev/null
+++ b/config.json
@@ -0,0 +1,42 @@
+{
+ "_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
+ "architectures": [
+ "MixtralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 32768,
+ "model_type": "mixtral",
+ "num_attention_heads": 32,
+ "num_experts_per_tok": 2,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "num_local_experts": 8,
+ "output_router_logits": true,
+ "quantization_config": {
+ "bnb_4bit_compute_dtype": "bfloat16",
+ "bnb_4bit_quant_type": "nf4",
+ "bnb_4bit_use_double_quant": true,
+ "llm_int8_enable_fp32_cpu_offload": false,
+ "llm_int8_has_fp16_weight": false,
+ "llm_int8_skip_modules": null,
+ "llm_int8_threshold": 6.0,
+ "load_in_4bit": true,
+ "load_in_8bit": false,
+ "quant_method": "bitsandbytes"
+ },
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "router_aux_loss_coef": 0.02,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.37.0.dev0",
+ "use_cache": false,
+ "vocab_size": 32000
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..dbe629867f4501804df6190873dea2329db091b6
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "trust_remote_code": true,
+ "unk_token": "",
+ "use_default_system_prompt": false,
+ "use_fast": true
+}